├── .github └── workflows │ └── ci.yml ├── .gitignore ├── README.md ├── requirements.txt └── src ├── __init__.py ├── common ├── __init__.py ├── camera_utils.py ├── debug_utils.py ├── meshes.py ├── obj_utils.py └── shapes.py ├── examples ├── example1.png ├── example1.py ├── example1b.png ├── example1b.py ├── example4.mp4 ├── example4.py ├── example4_target.png ├── example5.mp4 ├── example5.py ├── example5b.mp4 ├── example5b.py ├── example5b_target.png ├── example6.mp4 ├── example6.py ├── example6_target.png ├── example6b.mp4 ├── example6b.py ├── example6b_target.png ├── example7b.py ├── example7b_target1.png ├── example7b_target2.png ├── example7b_target3.png ├── example7b_target4.png └── teapot.obj ├── mesh_renderer ├── README.md ├── __init__.py ├── kernels │ ├── rasterize_triangles.cpp │ └── setup.py ├── mesh_renderer_test.py ├── rasterize.py ├── rasterize_triangles_ext.py ├── rasterize_triangles_python.py ├── rasterize_triangles_test.py ├── render.py ├── test_data │ ├── Barycentrics_Cube.png │ ├── Colored_Cube_0.png │ ├── Colored_Cube_1.png │ ├── External_Triangle.png │ ├── Gray_Cube_0.png │ ├── Gray_Cube_1.png │ ├── Inside_Box.png │ ├── Perspective_Corrected_Triangle.png │ ├── Simple_Tetrahedron.png │ ├── Simple_Triangle.png │ ├── Unlit_Cube_0.png │ └── Unlit_Cube_1.png └── test_utils.py └── soft_mesh_renderer ├── README.md ├── __init__.py ├── quadtree.py ├── rasterize.py ├── render.py └── test_rasterize.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python application 5 | 6 | on: [push, pull_request] 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Set up Python 3.10 19 | uses: actions/setup-python@v3 20 | with: 21 | python-version: "3.10" 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install flake8 pytest 26 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 27 | - name: Test with pytest 28 | run: | 29 | pytest -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # celery beat schedule file 96 | celerybeat-schedule 97 | 98 | # SageMath parsed files 99 | *.sage.py 100 | 101 | # Environments 102 | .env 103 | .venv 104 | env/ 105 | venv/ 106 | ENV/ 107 | env.bak/ 108 | venv.bak/ 109 | 110 | # Spyder project settings 111 | .spyderproject 112 | .spyproject 113 | 114 | # Rope project settings 115 | .ropeproject 116 | 117 | # mkdocs documentation 118 | /site 119 | 120 | # mypy 121 | .mypy_cache/ 122 | .dmypy.json 123 | dmypy.json 124 | 125 | # Pyre type checker 126 | .pyre/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This repository contains implementations of two differentiable 3D mesh renderers using PyTorch: 4 | - `mesh_renderer`: A port of Google's [tf_mesh_renderer](https://github.com/google/tf_mesh_renderer) from Tensorflow to PyTorch. Based on the barycentric formulation from [Genova et al. 2018 "Unsupervised training for 3d morphable model regression."](https://openaccess.thecvf.com/content_cvpr_2018/papers/Genova_Unsupervised_Training_for_CVPR_2018_paper.pdf) 5 | - `soft_mesh_renderer`: An alternate implementation of [SoftRas](https://github.com/ShichenLiu/SoftRas) that I built for my own learning. Based on the probabilistic rasterization formulation by [Liu et al. 2019 "Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning"](https://arxiv.org/abs/1904.01786). 6 | 7 | # Setup 8 | 9 | 1. Create a virtual environment with `python3 -m venv env` 10 | 2. Activate it with `source env/bin/activate` 11 | 3. Install external dependencies with `pip install -r requirements.txt` 12 | 13 | Some additional setup is required to use the optimized kernel for the barycentric renderer. See [docs](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/mesh_renderer/README.md) for more. 14 | 15 | # Testing 16 | 17 | Tests are included for both renderers. 18 | 19 | - mesh_renderer: See [mesh_renderer docs](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/mesh_renderer/README.md) for how to run these tests. 20 | - soft_mesh_renderer: See [soft_mesh_renderer docs](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/soft_mesh_renderer/README.md) for how to run these tests. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | filelock==3.13.1 2 | fsspec==2023.12.2 3 | imageio==2.33.1 4 | imageio-ffmpeg==0.4.9 5 | Jinja2==3.1.2 6 | lazy_loader==0.3 7 | MarkupSafe==2.1.3 8 | mpmath==1.3.0 9 | networkx==3.2.1 10 | numpy==1.26.2 11 | packaging==23.2 12 | Pillow==10.1.0 13 | scikit-image==0.22.0 14 | scipy==1.11.4 15 | sympy==1.12 16 | tifffile==2023.12.9 17 | torch==2.1.2 18 | typing_extensions==4.9.0 19 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/__init__.py -------------------------------------------------------------------------------- /src/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/common/__init__.py -------------------------------------------------------------------------------- /src/common/camera_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import math 6 | import numpy as np 7 | import torch 8 | 9 | 10 | def euler_matrices(angles): 11 | """Compute a XYZ Tait-Bryan (improper Euler angle) rotation. 12 | 13 | Return 4x4 matrices for convenient multiplication with other transformations. 14 | 15 | Args: 16 | angles: a [batch_size, 3] tensor containing X, Y, and Z angles in radians. 17 | 18 | Returns: 19 | a [batch_size, 4, 4] tensor of matrices. 20 | """ 21 | s = torch.sin(angles) 22 | c = torch.cos(angles) 23 | # Rename variables for readability in the matrix definition below. 24 | c0, c1, c2 = (c[:, 0], c[:, 1], c[:, 2]) 25 | s0, s1, s2 = (s[:, 0], s[:, 1], s[:, 2]) 26 | 27 | zeros = torch.zeros_like(s[:, 0]) 28 | ones = torch.ones_like(s[:, 0]) 29 | 30 | flattened = torch.cat( 31 | [ 32 | c2*c1, c2*s1*s0 - c0*s2, s2*s0 + c2*c0*s1, zeros, 33 | c1*s2, c2*c0 + s2*s1*s0, c0*s2*s1 - c2*s0, zeros, 34 | -s1, c1*s0, c1*c0, zeros, 35 | zeros, zeros, zeros, ones 36 | ], 37 | dim=0) 38 | reshaped = torch.reshape(flattened, [4, 4, -1]) 39 | # transpose dims [0, 1, 2] -> [2, 0, 1] 40 | reshaped = torch.transpose(reshaped, 0, 1) 41 | reshaped = torch.transpose(reshaped, 0, 2) 42 | return reshaped 43 | 44 | 45 | def look_at(eye, center, world_up): 46 | """Compute camera viewing matrices. 47 | 48 | Functionality mimes gluLookAt (external/GL/glu/include/GLU/glu.h). 49 | 50 | Args: 51 | eye: 2D float32 tensor with shape [batch_size, 3] containing the XYZ 52 | world space position of the camera. 53 | center: 2D float32 tensor with shape [batch_size, 3] containing a 54 | position along the center of the camera's gaze line. 55 | world_up: 2D float32 tensor with shape [batch_size, 3] specifying the 56 | world's up direction; the output camera will have no tilt with 57 | respect to this direction. 58 | 59 | Returns: 60 | A [batch_size, 4, 4] float tensor containing a right-handed camera 61 | extrinsics matrix that maps points from world space to points in eye 62 | space. 63 | """ 64 | batch_size = center.shape[0] 65 | vector_degeneracy_cutoff = 1e-6 66 | forward = center - eye 67 | forward_norm = torch.linalg.norm(forward, ord=None, dim=1, keepdim=True).clone() 68 | np.testing.assert_array_less(vector_degeneracy_cutoff, forward_norm, 69 | err_msg="Camera matrix is degenerate because eye and center are close.") 70 | forward = forward/forward_norm 71 | 72 | to_side = torch.cross(forward, world_up, dim=-1) 73 | to_side_norm = torch.linalg.norm(to_side, ord=None, dim=1, keepdim=True).clone() 74 | np.testing.assert_array_less(vector_degeneracy_cutoff, to_side_norm, 75 | err_msg="Camera matrix is degenerate because up and gaze are too close " 76 | "or because up is degenerate.") 77 | to_side = to_side/to_side_norm 78 | cam_up = torch.cross(to_side, forward, dim=-1) 79 | 80 | w_column = torch.tensor( 81 | batch_size * [[0., 0., 0., 1.]], dtype=torch.float32) # [batch_size, 4] 82 | w_column = torch.reshape(w_column, [batch_size, 4, 1]) 83 | view_rotation = torch.stack( 84 | [to_side, cam_up, -forward, 85 | torch.zeros_like(to_side, dtype=torch.float32)], 86 | dim=1) # [batch_size, 4, 3] matrix 87 | view_rotation = torch.cat([view_rotation, w_column], 88 | dim=2) # [batch_size, 4, 4] 89 | 90 | identity_batch = torch.unsqueeze(torch.eye(3), 0).repeat([batch_size, 1, 1]) 91 | view_translation = torch.cat([identity_batch, torch.unsqueeze(-eye, 2)], 2) 92 | view_translation = torch.cat( 93 | [view_translation, 94 | torch.reshape(w_column, [batch_size, 1, 4])], 1) 95 | camera_matrices = torch.matmul(view_rotation, view_translation) 96 | return camera_matrices 97 | 98 | 99 | def perspective(aspect_ratio, fov_y, near_clip, far_clip): 100 | """Computes perspective transformation matrices. 101 | 102 | Functionality mimes gluPerspective (external/GL/glu/include/GLU/glu.h). 103 | See: 104 | https://unspecified.wordpress.com/2012/06/21/calculating-the-gluperspective-matrix-and-other-opengl-matrix-maths/ 105 | 106 | Args: 107 | aspect_ratio: float value specifying the image aspect ratio 108 | (width/height). 109 | fov_y: 1D float32 Tensor with shape [batch_size] specifying output 110 | vertical field of views in degrees. 111 | near_clip: 1D float32 Tensor with shape [batch_size] specifying near 112 | clipping plane distance. 113 | far_clip: 1D float32 Tensor with shape [batch_size] specifying far 114 | clipping plane distance. 115 | 116 | Returns: 117 | A [batch_size, 4, 4] float tensor that maps from right-handed points in 118 | eye space to left-handed points in clip space. 119 | """ 120 | # The multiplication of fov_y by pi/360.0 simultaneously converts to radians 121 | # and adds the half-angle factor of .5. 122 | focal_lengths_y = 1.0 / torch.tan(fov_y * (math.pi / 360.0)) 123 | depth_range = far_clip - near_clip 124 | p_22 = -(far_clip + near_clip) / depth_range 125 | p_23 = -2.0 * (far_clip * near_clip / depth_range) 126 | 127 | zeros = torch.zeros_like(p_23, dtype=torch.float32) 128 | perspective_transform = torch.cat( 129 | [ 130 | focal_lengths_y / aspect_ratio, zeros, zeros, zeros, 131 | zeros, focal_lengths_y, zeros, zeros, 132 | zeros, zeros, p_22, p_23, 133 | zeros, zeros, -torch.ones_like(p_23, dtype=torch.float32), zeros 134 | ], dim=0) 135 | perspective_transform = torch.reshape(perspective_transform, [4, 4, -1]) 136 | # transpose dimensions [0, 1, 2] -> [2, 0, 1] 137 | perspective_transform = torch.transpose(perspective_transform, 0, 1) 138 | perspective_transform = torch.transpose(perspective_transform, 0, 2) 139 | return perspective_transform 140 | 141 | 142 | def transform_homogeneous(matrices, vertices): 143 | """Applies batched 4x4 homogeneous matrix transforms to 3D vertices. 144 | 145 | The vertices are input and output as row-major, but are interpreted as 146 | column vectors multiplied on the right-hand side of the matrices. More 147 | explicitly, this function computes (MV^T)^T. 148 | Vertices are assumed to be xyz, and are extended to xyzw with w=1. 149 | 150 | Args: 151 | matrices: a [batch_size, 4, 4] tensor of matrices. 152 | vertices: a [batch_size, N, 3] tensor of xyz vertices. 153 | 154 | Returns: 155 | a [batch_size, N , 4] tensor of xyzw vertices. 156 | 157 | Raises: 158 | ValueError: if matrices or vertices have the wrong number of dimensions. 159 | """ 160 | if len(matrices.shape) != 3: 161 | raise ValueError( 162 | "matrices must have 3 dimensions (missing batch dimension?)") 163 | if len(vertices.shape) != 3: 164 | raise ValueError( 165 | "vertices must have 3 dimensions (missing batch dimension?)") 166 | homogeneous_coord = torch.ones( 167 | [vertices.shape[0], vertices.shape[1], 1], dtype=torch.float32) 168 | vertices_homogeneous = torch.cat([vertices, homogeneous_coord], 2) 169 | 170 | return torch.matmul(vertices_homogeneous, matrices.transpose(1, 2)) 171 | -------------------------------------------------------------------------------- /src/common/debug_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def debug_tensor(tensor, msg=""): 4 | torch.set_printoptions(profile="full", linewidth=200) 5 | print("[debug tensor] {}".format(msg)) 6 | print(tensor) 7 | torch.set_printoptions(profile="default", linewidth=80) 8 | 9 | def check_isnan_isinf(tensor, msg=""): 10 | if torch.isnan(tensor).any() or torch.isinf(tensor).any(): 11 | raise ValueError(msg) -------------------------------------------------------------------------------- /src/common/meshes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def compute_vertex_normals(vertices, triangles): 4 | """ 5 | Computes vertex normals for a triangle mesh by first computing 6 | face normals, then averaging the normals on incident vertices. 7 | The resulting vectors are normalized. 8 | 9 | Args: 10 | vertices: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each 11 | triplet is an xyz position in world space. 12 | triangles: 2D int32 tensor with shape [triangle_count, 3]. 13 | 14 | Returns: 15 | - A tensor with shape [batch_size, vertex_count, 3] providing per-vertex normal 16 | vectors. 17 | """ 18 | batch_size = vertices.shape[0] 19 | normals = torch.zeros_like(vertices) 20 | for b in range(batch_size): 21 | # vertices_faces[i][j] gives the vertex corresponding to triangles[i][j] 22 | vertices_faces = vertices[b, triangles.long(), :] # [vertex_count, 3, 3] 23 | normals[b].index_add_(0, triangles[:, 0].long(), 24 | torch.cross(vertices_faces[:, 1] - vertices_faces[:, 0], 25 | vertices_faces[:, 2] - vertices_faces[:, 0]) 26 | ) 27 | normals[b].index_add_(0, triangles[:, 1].long(), 28 | torch.cross(vertices_faces[:, 2] - vertices_faces[:, 1], 29 | vertices_faces[:, 0] - vertices_faces[:, 1]) 30 | ) 31 | normals[b].index_add_(0, triangles[:, 2].long(), 32 | torch.cross(vertices_faces[:, 0] - vertices_faces[:, 2], 33 | vertices_faces[:, 1] - vertices_faces[:, 2]) 34 | ) 35 | normals = torch.nn.functional.normalize(normals, eps=1e-6, p=2, dim=-1) 36 | return normals -------------------------------------------------------------------------------- /src/common/obj_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from . import meshes 5 | 6 | def load_obj(filename, normalize=True): 7 | """ 8 | Load Wavefront .obj file. 9 | This function only supports vertices (v x x x), normals (vn x x x), 10 | and faces (f x x x). Per-face-vertex normals are not supported; 11 | they will be averaged out so that each vertex gets exactly 1 normal. 12 | 13 | Returns: 14 | - vertices, faces, normals: Tuple of tensors with shapes 15 | ([vertex_count, 3], [triangle_count, 3], [vertex_count, 3]) 16 | and types (float32, int32, float32). 17 | """ 18 | 19 | vertices = [] 20 | all_normals = [] 21 | vertex_id_to_normals = {} 22 | faces = [] 23 | with open(filename) as f: 24 | lines = f.readlines() 25 | 26 | for line in lines: 27 | parts = line.split() 28 | if len(parts) == 0: 29 | continue 30 | if parts[0] == 'v': 31 | vertices.append([float(v) for v in parts[1:4]]) 32 | elif parts[0] == 'vn': 33 | all_normals.append([float(v) for v in parts[1:4]]) 34 | elif parts[0] == 'f': 35 | face_vertices = line.split()[1:] 36 | if len(face_vertices) > 3: 37 | print("warning: encountered a face with more than 3 vertices," + 38 | "extra vertices will be skipped") 39 | faces.append([int(face_vertex.split('/')[0]) for face_vertex in face_vertices[:3]]) 40 | if len(face_vertices[0].split('/')) > 2: 41 | # handle face-vertex normal spec: `f v1//vn1 v2//vn2 v3//vn3` 42 | for face_vertex in face_vertices[:3]: 43 | parts = face_vertex.split('/') 44 | vertex_id = int(parts[0]) - 1 45 | normal_id = int(parts[2]) - 1 46 | if vertex_id not in vertex_id_to_normals: 47 | vertex_id_to_normals[vertex_id] = [] 48 | vertex_id_to_normals[vertex_id].append(normal_id) 49 | 50 | vertices = torch.tensor(vertices, dtype=torch.float32) 51 | faces = torch.tensor(faces, dtype=torch.int32) - 1 52 | all_normals = torch.tensor(all_normals, dtype=torch.float32) 53 | normals = torch.zeros_like(vertices) 54 | 55 | if len(vertex_id_to_normals) == 0: 56 | normals = meshes.compute_vertex_normals( 57 | vertices[None, :, :], faces)[0] 58 | else: 59 | # average all face-vertex normals to a single normal vector per vertex 60 | for i in range(len(vertices)): 61 | if i not in vertex_id_to_normals: 62 | normals[i] = torch.ones(3) 63 | continue 64 | n = len(vertex_id_to_normals[i]) 65 | for j in vertex_id_to_normals[i]: 66 | normals[i] += all_normals[j] / n 67 | normals = torch.nn.functional.normalize(normals, p=2.0, dim=1) 68 | 69 | if normalize: 70 | # normalize into a unit cube centered around zero 71 | vertices -= vertices.min(0)[0][None, :] 72 | vertices /= torch.abs(vertices).max() 73 | vertices *= 2 74 | vertices -= vertices.max(0)[0][None, :] / 2 75 | 76 | return vertices, faces, normals 77 | 78 | def save_obj(filename, vertices, faces, normals=None): 79 | """ 80 | Save mesh to Wavefront .obj file. 81 | This function only supports vertices (v x x x), normals (vn x x x), 82 | and faces (f x x x). Per-face-vertex normals are not supported; 83 | normals must be the same shape as vertices and are assumed to be 1-1 to 84 | vertices. 85 | """ 86 | if len(vertices.shape) != 2 or vertices.shape[1] != 3: 87 | raise ValueError("vertices must have shape [vertex_count, 3]") 88 | if len(faces.shape) != 2 or faces.shape[1] != 3: 89 | raise ValueError("faces must have shape [triangle_count, 3]") 90 | if normals is not None: 91 | if len(normals.shape) != 2 or normals.shape[1] != 3: 92 | raise ValueError("normals must have shape [vertex_count, 3]") 93 | with open(filename, "w") as f: 94 | for vertex in vertices: 95 | f.write("v {} {} {}\n".format(vertex[0], vertex[1], vertex[2])) 96 | for face in faces: 97 | if normals is not None: 98 | f.write("f {}//{} {}//{} {}//{}\n".format( 99 | face[0] + 1, face[0] + 1, 100 | face[1] + 1, face[1] + 1, 101 | face[2] + 1, face[2] + 1 102 | )) 103 | else: 104 | f.write("f {} {} {}\n".format( 105 | face[0] + 1, 106 | face[1] + 1, 107 | face[2] + 1, 108 | )) 109 | if normals is not None: 110 | for normal in normals: 111 | f.write("vn {} {} {}\n".format(normal[0], normal[1], normal[2])) -------------------------------------------------------------------------------- /src/common/shapes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def sphere(radius, resolution=25): 5 | """ 6 | Creates a triangle mesh representing a sphere with the given radius. 7 | The mesh will be centered on the origin. 8 | 9 | Returns: A tuple (vertices, triangles, normals) 10 | - vertices: Float tensor of shape [num_vertices, 3] giving vertices in XYZ world space. 11 | - triangles: Int32 tensor of shape [num_triangles, 3] giving vertex IDs of triangles. 12 | The vertex IDs are ordered such that they wind CCW with respect to a viewer looking 13 | at the outside of the sphere. 14 | - normals: Float tensor of shape [num_vertices, 3] giving vertex normals in XYZ world space. 15 | The vectors are normalized. 16 | """ 17 | # We divide the sphere in K uniform longitude (phi) intervals. 18 | # Each longitude line starts and ends at the vertical poles of the sphere, 19 | # which are special vertices. Within each line, we will insert K vertices 20 | # between the poles by uniformly splitting latitude (theta). 21 | # 22 | # Thus, within the latitude lines not including the poles, we have equatorial 23 | # strips which are also uniformly split by the longitude lines. Each adjacent 24 | # pair of (theta, theta + theta_step) defines an equatorial strip, then when intersected 25 | # with an adjacent pair of phi, defines a quad on the surface of the sphere with 26 | # top-left corner at (theta, phi) and bottom-right corner at (theta + theta_step, 27 | # phi + phi_step). These quads are further split into 2 triangles each. 28 | # 29 | # The poles then connect to the adjacent latitude lines via the longitude lines. 30 | # Each pair of adjacent longitude lines (phi) forms a triangle. 31 | K = resolution 32 | phi_step = 2.*np.pi/K 33 | theta_step = np.pi/(K+1) 34 | num_vertices = K**2 + 2 35 | num_triangles = (2 * (K - 1)*K) + 2 * K 36 | vertices = torch.zeros([num_vertices, 3], dtype=torch.float32) 37 | triangles = torch.zeros([num_triangles, 3], dtype=torch.int32) 38 | i = 0 39 | # Vertex ids are grouped by latitude line: 40 | # 0..K-1 are theta == 1*theta_step 41 | # K..2K-1 are theta == 2*theta_step 42 | # ... 43 | # (K-1)*K..K*K-1 are theta == K*theta_step 44 | for theta in np.linspace(theta_step, np.pi - theta_step, K, endpoint=True): 45 | for phi in np.linspace(0., 2.*np.pi, K, endpoint=False): 46 | vertices[i] = radius * torch.tensor([ 47 | np.sin(theta) * np.sin(phi), 48 | np.cos(theta), 49 | np.sin(theta) * np.cos(phi), 50 | ]) 51 | i += 1 52 | # Last 2 vertex ids are the poles 53 | vertices[num_vertices - 2] = torch.tensor([0., 1., 0.]) 54 | vertices[num_vertices - 1] = torch.tensor([0., -1., 0.]) 55 | 56 | triangle_id = 0 57 | for i in range(K-1): 58 | for j in range(K): 59 | top_left = i * K + j 60 | top_right = i * K + j + 1 61 | bottom_left = (i + 1) * K + j 62 | bottom_right = (i + 1) * K + j + 1 63 | triangles[triangle_id] = torch.tensor([top_left, bottom_left, top_right]) 64 | triangles[triangle_id + 1] = torch.tensor([top_right, bottom_left, bottom_right]) 65 | triangle_id += 2 66 | # connect top pole to topmost latitude line 67 | for i in range(K): 68 | left = i 69 | right = i+1 70 | top = num_vertices - 2 71 | triangles[triangle_id] = torch.tensor([top, left, right]) 72 | triangle_id += 1 73 | # connect bottom pole to bottommost latitude line 74 | for i in range(K): 75 | left = (K-1)*K + i 76 | right = (K-1)*K + i+1 77 | bottom = num_vertices - 1 78 | triangles[triangle_id] = torch.tensor([bottom, right, left]) 79 | triangle_id += 1 80 | normals = torch.nn.functional.normalize(vertices, p=2.0, dim=-1) 81 | return vertices, triangles, normals 82 | 83 | def cube(size): 84 | """ 85 | Creates a triangle mesh representing a cube with the given side length. 86 | The mesh will be centered on the origin. 87 | 88 | Returns: A tuple (vertices, triangles, normals) 89 | - vertices: Float tensor of shape [num_vertices, 3] giving vertices in XYZ world space. 90 | - triangles: Int32 tensor of shape [num_triangles, 3] giving vertex IDs of triangles. 91 | The vertex IDs are ordered such that they wind CCW with respect to a viewer looking 92 | at the outside of the sphere. 93 | - normals: Float tensor of shape [num_vertices, 3] giving vertex normals in XYZ world space. 94 | The vectors are normalized. Note that face-vertex normals are not supported and so 95 | the vertex normals will be the average of the normals of the incident faces. 96 | """ 97 | vertices = 0.5 * size * torch.tensor( 98 | [[-1, -1, 1], [-1, -1, -1], [-1, 1, -1], [-1, 1, 1], [1, -1, 1], 99 | [1, -1, -1], [1, 1, -1], [1, 1, 1]], 100 | dtype=torch.float32) 101 | normals = torch.nn.functional.normalize(vertices, p=2.0, dim=-1) 102 | triangles = torch.tensor( 103 | [ 104 | [2, 1, 0], 105 | [0, 3, 2], 106 | [6, 2, 3], 107 | [3, 7, 6], 108 | [5, 6, 7], 109 | [7, 4, 5], 110 | [1, 5, 4], 111 | [4, 0, 1], 112 | [2, 6, 5], 113 | [5, 1, 2], 114 | [0, 4, 7], 115 | [7, 3, 0] 116 | ], 117 | dtype=torch.int32) 118 | return vertices, triangles, normals -------------------------------------------------------------------------------- /src/examples/example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example1.png -------------------------------------------------------------------------------- /src/examples/example1.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 1: Rendering a teapot from arbitrary angle. 3 | """ 4 | 5 | import os 6 | import argparse 7 | 8 | import torch 9 | import numpy as np 10 | from skimage import io 11 | 12 | from .. import mesh_renderer as mr 13 | from ..common import obj_utils 14 | 15 | current_dir = os.path.dirname(os.path.realpath(__file__)) 16 | data_dir = os.path.join(current_dir, '.') 17 | 18 | if __name__ == "__main__": 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj')) 21 | parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example1.png')) 22 | args = parser.parse_args() 23 | 24 | # load obj file 25 | vertices, triangles, normals = obj_utils.load_obj(args.filename_input) 26 | vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3] 27 | # TODO why are triangles not batched? 28 | normals = normals[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3] 29 | 30 | # camera position: 31 | eye = torch.tensor([[0.0, 0.0, 3.0]], dtype=torch.float32) 32 | center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32) 33 | world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32) 34 | 35 | # create a diffuse colors tensor coloring all vertices white 36 | vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32) 37 | 38 | light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32) 39 | light_intensities = torch.ones([1, 1, 3], dtype=torch.float32) 40 | 41 | image_width = 640 42 | image_height = 480 43 | 44 | render = mr.render( 45 | vertices, triangles, normals, 46 | vertex_diffuse_colors, eye, center, world_up, light_positions, 47 | light_intensities, image_width, image_height) 48 | render = torch.reshape(render, [image_height, image_width, 4]) 49 | result_image = render.numpy() 50 | result_image = np.clip(result_image, 0., 1.).copy(order="C") 51 | 52 | io.imsave(args.filename_output, (result_image * 255.0).astype(np.uint8)) 53 | -------------------------------------------------------------------------------- /src/examples/example1b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example1b.png -------------------------------------------------------------------------------- /src/examples/example1b.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 1b: Rendering a teapot from arbitrary angle with the soft rasterizer. 3 | """ 4 | 5 | import os 6 | import argparse 7 | 8 | import torch 9 | import numpy as np 10 | from skimage import io 11 | 12 | from .. import soft_mesh_renderer as smr 13 | from ..common import obj_utils 14 | 15 | current_dir = os.path.dirname(os.path.realpath(__file__)) 16 | data_dir = os.path.join(current_dir, '.') 17 | 18 | if __name__ == "__main__": 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj')) 21 | parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example1b.png')) 22 | args = parser.parse_args() 23 | 24 | # load obj file 25 | vertices, triangles, _ = obj_utils.load_obj(args.filename_input) 26 | vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3] 27 | # TODO why are triangles not batched? 28 | 29 | # camera position: 30 | eye = torch.tensor([[0.0, 0.0, 3.0]], dtype=torch.float32) 31 | center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32) 32 | world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32) 33 | 34 | # create a diffuse colors tensor coloring all vertices white 35 | vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32) 36 | 37 | light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32) 38 | light_intensities = torch.ones([1, 1], dtype=torch.float32) 39 | 40 | image_width = 100 41 | image_height = 100 42 | 43 | render = smr.render( 44 | vertices, 45 | triangles, 46 | vertex_diffuse_colors, 47 | eye, 48 | center, 49 | world_up, 50 | light_positions, 51 | light_intensities, 52 | image_width, 53 | image_height 54 | ) 55 | render = torch.reshape(render, [image_height, image_width, 4]) 56 | # Binarize the alpha channel to 0 or 1. In the raw output of the soft renderer, 57 | # it represents the probability that a triangle occupies the pixel. This will be 58 | # less than 1.0 for any pixel which is not entirely covered by a triangle, even if 59 | # the pixel is technically completely covered when considering all triangles. If we 60 | # don't binarize the value, we will get seams in the output along triangle edges. 61 | render[..., 3] = 1.0 * (render[..., 3] > 0.0) 62 | result_image = render.numpy() 63 | result_image = np.clip(result_image, 0., 1.).copy(order="C") 64 | 65 | io.imsave(args.filename_output, (result_image * 255.0).astype(np.uint8)) 66 | -------------------------------------------------------------------------------- /src/examples/example4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example4.mp4 -------------------------------------------------------------------------------- /src/examples/example4.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 4: Optimizing camera angles to reconstruct a teapot render. 3 | 4 | This example doesn't converge with the barycentric-based differentiable renderer. 5 | """ 6 | 7 | import os 8 | import argparse 9 | 10 | import torch 11 | import numpy as np 12 | from skimage import io 13 | import imageio 14 | import matplotlib.pyplot as plt 15 | 16 | from .. import mesh_renderer as mr 17 | from ..common import obj_utils 18 | from ..common import camera_utils 19 | 20 | current_dir = os.path.dirname(os.path.realpath(__file__)) 21 | data_dir = os.path.join(current_dir, '.') 22 | 23 | if __name__ == "__main__": 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj')) 26 | parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, 'example4_target.png')) 27 | parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example4.mp4')) 28 | args = parser.parse_args() 29 | 30 | # load obj file 31 | vertices, triangles, normals = obj_utils.load_obj(args.filename_input) 32 | vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3] 33 | # TODO why are triangles not batched? 34 | normals = normals[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3] 35 | 36 | image_width = 640 37 | image_height = 480 38 | 39 | target_render = torch.tensor( 40 | io.imread(args.filename_target).astype(float) / 255.0 41 | )[None,:,:,:] # [image_width, image_height, 4] -> [batch_size=1, image_width, image_height, 4] 42 | 43 | # create a diffuse colors tensor coloring all vertices white 44 | vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32) 45 | 46 | light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32) 47 | light_intensities = torch.ones([1, 1, 3], dtype=torch.float32) 48 | 49 | # camera position: 50 | # initial_eye = torch.tensor([0.0, 2.0, 3.0], dtype=torch.float32) 51 | # initial_world_up = torch.tensor([0.0, 3.0, -2.0], dtype=torch.float32) 52 | initial_eye = torch.tensor([0.0, 3.0, 3.0], dtype=torch.float32) 53 | initial_world_up = torch.tensor([0.0, 3.0, -3.0], dtype=torch.float32) 54 | eye = torch.tensor(initial_eye[None,:], dtype=torch.float32, requires_grad=True) 55 | camera_euler_angles = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32, requires_grad=False) 56 | 57 | writer = imageio.get_writer(args.filename_output, fps=20) 58 | optimizer = torch.optim.SGD([eye, camera_euler_angles], 0.7, 0.1) 59 | def stepfn(): 60 | optimizer.zero_grad() 61 | 62 | camera_euler_transforms = camera_utils.euler_matrices(camera_euler_angles)[0, :3, :3] # [3, 3] 63 | forward = torch.reshape(torch.matmul(-initial_eye, camera_euler_transforms.T), [1, 3]) 64 | world_up = torch.reshape(torch.matmul(initial_world_up, camera_euler_transforms.T), [1, 3]) 65 | center = eye + forward 66 | render = mr.render( 67 | vertices, triangles, normals, 68 | vertex_diffuse_colors, eye, center, world_up, light_positions, 69 | light_intensities, image_width, image_height) 70 | 71 | # write to GIF output 72 | frame = render[0].detach().numpy() # [image_height, image_width, 4] 73 | # black background 74 | frame = np.concatenate([ 75 | frame[:,:,:3]*frame[:,:,3][:,:,None], 76 | np.ones([image_height, image_width, 1], dtype=np.float32) 77 | ], axis=-1) 78 | writer.append_data((255*frame).astype(np.uint8)) 79 | 80 | loss = torch.mean(torch.abs(render - target_render)) 81 | loss.backward() 82 | torch.nn.utils.clip_grad_norm_([eye, center, world_up], 1.0) 83 | return loss 84 | 85 | epochs = 50 86 | loss_points = [] 87 | for e in range(epochs): 88 | print("step {} of {}".format(e, epochs)) 89 | loss = optimizer.step(stepfn) 90 | loss_points.append(float(loss)) 91 | 92 | writer.close() 93 | 94 | x = np.arange(0, epochs, 1) 95 | y = np.array(loss_points) 96 | plt.plot(x, y) 97 | plt.show() 98 | 99 | 100 | -------------------------------------------------------------------------------- /src/examples/example4_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example4_target.png -------------------------------------------------------------------------------- /src/examples/example5.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example5.mp4 -------------------------------------------------------------------------------- /src/examples/example5.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 5: Optimizing rotation of a cube. 3 | """ 4 | 5 | import os 6 | import argparse 7 | 8 | import torch 9 | import numpy as np 10 | from skimage import io 11 | import imageio 12 | import matplotlib.pyplot as plt 13 | 14 | from .. import mesh_renderer as mr 15 | from ..common import camera_utils, shapes 16 | 17 | current_dir = os.path.dirname(os.path.realpath(__file__)) 18 | data_dir = os.path.join(current_dir, '.') 19 | 20 | if __name__ == "__main__": 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, '../mesh_renderer/test_data/Gray_Cube_0.png')) 23 | parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example5.mp4')) 24 | args = parser.parse_args() 25 | 26 | image_width = 640 27 | image_height = 480 28 | 29 | # Set up a basic cube centered at the origin, with vertex normals pointing 30 | # outwards along the line from the origin to the cube vertices: 31 | cube_vertices, cube_triangles, cube_normals = shapes.cube(2.) 32 | cube_triangles = torch.flip(cube_triangles, [1]) # CCW -> CW 33 | 34 | initial_euler_angles = [[0.0, 0.0, 0.0]] 35 | euler_angles = torch.tensor(initial_euler_angles, requires_grad=True) 36 | 37 | def render_cube_with_rotation(input_euler_angles): 38 | model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3] 39 | 40 | vertices_world_space = torch.reshape( 41 | torch.matmul(cube_vertices, model_rotation.T), 42 | [1, 8, 3]) 43 | 44 | normals_world_space = torch.reshape( 45 | torch.matmul(cube_normals, model_rotation.T), 46 | [1, 8, 3]) 47 | 48 | # camera position: 49 | eye = torch.tensor([[0.0, 0.0, 6.0]], dtype=torch.float32) 50 | center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32) 51 | world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32) 52 | 53 | vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32) 54 | light_positions = torch.reshape(eye, [1, 1, 3]) 55 | light_intensities = torch.ones([1, 1, 3], dtype=torch.float32) 56 | 57 | render = mr.render( 58 | vertices_world_space, cube_triangles, normals_world_space, 59 | vertex_diffuse_colors, eye, center, world_up, light_positions, 60 | light_intensities, image_width, image_height) 61 | render = torch.reshape(render, [image_height, image_width, 4]) 62 | return render 63 | 64 | target_render = torch.tensor( 65 | io.imread(args.filename_target).astype(float) / 255.0 66 | ) # [image_width, image_height, 4] 67 | 68 | writer = imageio.get_writer(args.filename_output, fps=20) 69 | optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1) 70 | def stepfn(): 71 | optimizer.zero_grad() 72 | render = render_cube_with_rotation(euler_angles) 73 | 74 | # write to GIF output 75 | frame = render.detach().numpy() # [image_height, image_width, 4] 76 | # black background 77 | frame = np.concatenate([ 78 | frame[:,:,:3]*frame[:,:,3][:,:,None], 79 | np.ones([image_height, image_width, 1], dtype=np.float32) 80 | ], axis=-1) 81 | writer.append_data((255*frame).astype(np.uint8)) 82 | 83 | loss = torch.mean(torch.abs(render - target_render)) 84 | loss.backward() 85 | torch.nn.utils.clip_grad_norm_([euler_angles], 1.0) 86 | return loss 87 | 88 | epochs = 50 89 | loss_points = [] 90 | for e in range(epochs): 91 | print("step {} of {}".format(e, epochs)) 92 | loss = optimizer.step(stepfn) 93 | loss_points.append(float(loss)) 94 | 95 | writer.close() 96 | 97 | x = np.arange(0, epochs, 1) 98 | y = np.array(loss_points) 99 | plt.plot(x, y) 100 | plt.show() 101 | -------------------------------------------------------------------------------- /src/examples/example5b.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example5b.mp4 -------------------------------------------------------------------------------- /src/examples/example5b.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 5b: Optimizing rotation of a cube with the soft rasterizer. 3 | """ 4 | 5 | import os 6 | import argparse 7 | 8 | import torch 9 | import numpy as np 10 | from skimage import io 11 | import imageio 12 | import matplotlib.pyplot as plt 13 | 14 | from .. import soft_mesh_renderer as smr 15 | from ..common import camera_utils, shapes 16 | 17 | current_dir = os.path.dirname(os.path.realpath(__file__)) 18 | data_dir = os.path.join(current_dir, '.') 19 | 20 | if __name__ == "__main__": 21 | parser = argparse.ArgumentParser() 22 | 23 | parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, 'example5b_target.png')) 24 | parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example5b.mp4')) 25 | args = parser.parse_args() 26 | 27 | image_width = 100 28 | image_height = 100 29 | 30 | # Set up a basic cube centered at the origin, with vertex normals pointing 31 | # outwards along the line from the origin to the cube vertices: 32 | cube_vertices, cube_triangles, _ = shapes.cube(2.) 33 | 34 | initial_euler_angles = [[0.0, 0.0, 0.0]] 35 | euler_angles = torch.tensor(initial_euler_angles, requires_grad=True) 36 | 37 | def render_cube_with_rotation(input_euler_angles): 38 | model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3] 39 | 40 | vertices_world_space = torch.reshape( 41 | torch.matmul(cube_vertices, model_rotation.T), 42 | [1, 8, 3]) 43 | 44 | # camera position: 45 | eye = torch.tensor([[0.0, 0.0, 6.0]], dtype=torch.float32) 46 | center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32) 47 | world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32) 48 | 49 | vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32) 50 | light_positions = torch.reshape(eye, [1, 1, 3]) 51 | light_intensities = torch.ones([1, 1], dtype=torch.float32) 52 | 53 | render = smr.render( 54 | vertices_world_space, 55 | cube_triangles, 56 | vertex_diffuse_colors, 57 | eye, 58 | center, 59 | world_up, 60 | light_positions, 61 | light_intensities, 62 | image_width, 63 | image_height, 64 | ) 65 | render = torch.reshape(render, [image_height, image_width, 4]) 66 | return render 67 | 68 | """ 69 | Target was generated with: 70 | ``` 71 | target_euler_angles = torch.tensor([[-20.0, 0.0, 60.0]]) 72 | ``` 73 | """ 74 | target_render = torch.tensor( 75 | io.imread(args.filename_target).astype(float) / 255.0 76 | ) # [image_width, image_height, 4] 77 | 78 | writer = imageio.get_writer(args.filename_output, fps=20) 79 | optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1) 80 | def stepfn(): 81 | optimizer.zero_grad() 82 | render = render_cube_with_rotation(euler_angles) 83 | 84 | # write to GIF output 85 | frame = render.detach().numpy() # [image_height, image_width, 4] 86 | # black background 87 | frame = np.concatenate([ 88 | frame[:,:,:3]*frame[:,:,3][:,:,None], 89 | np.ones([image_height, image_width, 1], dtype=np.float32) 90 | ], axis=-1) 91 | writer.append_data((255*frame).astype(np.uint8)) 92 | 93 | loss = torch.mean(torch.abs(render - target_render)) 94 | loss.backward() 95 | torch.nn.utils.clip_grad_norm_([euler_angles], 1.0) 96 | return loss 97 | 98 | epochs = 50 99 | loss_points = [] 100 | for e in range(epochs): 101 | print("step {} of {}".format(e, epochs)) 102 | loss = optimizer.step(stepfn) 103 | loss_points.append(float(loss)) 104 | 105 | writer.close() 106 | 107 | x = np.arange(0, epochs, 1) 108 | y = np.array(loss_points) 109 | plt.plot(x, y) 110 | plt.show() 111 | -------------------------------------------------------------------------------- /src/examples/example5b_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example5b_target.png -------------------------------------------------------------------------------- /src/examples/example6.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example6.mp4 -------------------------------------------------------------------------------- /src/examples/example6.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 6: Optimizing rotation of a teapot. 3 | 4 | This example converges for small perturbations in rotation but not larger perturbations 5 | using the barycentric-differentiable renderer. 6 | """ 7 | 8 | import os 9 | import argparse 10 | 11 | import torch 12 | import numpy as np 13 | from skimage import io 14 | import imageio 15 | import matplotlib.pyplot as plt 16 | 17 | from .. import mesh_renderer as mr 18 | from ..common import obj_utils 19 | from ..common import camera_utils 20 | 21 | current_dir = os.path.dirname(os.path.realpath(__file__)) 22 | data_dir = os.path.join(current_dir, '.') 23 | 24 | if __name__ == "__main__": 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj')) 27 | parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, 'example6_target.png')) 28 | parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example6.mp4')) 29 | args = parser.parse_args() 30 | 31 | image_width = 640 32 | image_height = 480 33 | 34 | # load obj file 35 | vertices, triangles, normals = obj_utils.load_obj(args.filename_input) 36 | vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3] 37 | # TODO why are triangles not batched? 38 | normals = normals[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3] 39 | 40 | # camera position: 41 | eye = torch.tensor([[0.0, 3.0, 3.0]], dtype=torch.float32) 42 | center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32) 43 | world_up = torch.tensor([0.0, np.cos(-np.pi/4.), np.sin(-np.pi/4.)], dtype=torch.float32) 44 | 45 | vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32) 46 | light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32) 47 | light_intensities = torch.ones([1, 1, 3], dtype=torch.float32) 48 | 49 | initial_euler_angles = [[np.pi/4., 0., 0.]] 50 | euler_angles = torch.tensor(initial_euler_angles, requires_grad=True) 51 | 52 | def render_with_rotation(input_euler_angles): 53 | model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3] 54 | 55 | vertices_world_space = torch.matmul(vertices, model_rotation.T) 56 | # normals must be transformed using the inverse of the transpose of a matrix M 57 | normals_world_space = torch.matmul(normals, torch.inverse(model_rotation.T).T) 58 | 59 | render = mr.render( 60 | vertices_world_space, triangles, normals_world_space, 61 | vertex_diffuse_colors, eye, center, world_up, light_positions, 62 | light_intensities, image_width, image_height) 63 | render = torch.reshape(render, [image_height, image_width, 4]) 64 | return render 65 | 66 | target_render = torch.tensor( 67 | io.imread(args.filename_target).astype(float) / 255.0 68 | ) # [image_width, image_height, 4] 69 | 70 | writer = imageio.get_writer(args.filename_output, fps=20) 71 | optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1) 72 | def stepfn(): 73 | optimizer.zero_grad() 74 | render = render_with_rotation(euler_angles) 75 | 76 | # write to GIF output 77 | frame = render.detach().numpy() # [image_height, image_width, 4] 78 | # black background 79 | frame = np.concatenate([ 80 | frame[:,:,:3]*frame[:,:,3][:,:,None], 81 | np.ones([image_height, image_width, 1], dtype=np.float32) 82 | ], axis=-1) 83 | writer.append_data((255*frame).astype(np.uint8)) 84 | 85 | loss = torch.mean(torch.abs(render - target_render)) 86 | loss.backward() 87 | torch.nn.utils.clip_grad_norm_([euler_angles], 1.0) 88 | return loss 89 | 90 | epochs = 50 91 | loss_points = [] 92 | for e in range(epochs): 93 | print("step {} of {}".format(e, epochs)) 94 | loss = optimizer.step(stepfn) 95 | loss_points.append(float(loss)) 96 | 97 | writer.close() 98 | 99 | x = np.arange(0, epochs, 1) 100 | y = np.array(loss_points) 101 | plt.plot(x, y) 102 | plt.show() 103 | -------------------------------------------------------------------------------- /src/examples/example6_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example6_target.png -------------------------------------------------------------------------------- /src/examples/example6b.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example6b.mp4 -------------------------------------------------------------------------------- /src/examples/example6b.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 6b: Optimizing rotation of a teapot with the soft mesh renderer. 3 | 4 | This example converges for small perturbations in rotation but not larger perturbations 5 | using the barycentric-differentiable renderer. The soft renderer is able to robustly 6 | optimize larger perturbations than the barycentric renderer. 7 | """ 8 | 9 | import os 10 | import argparse 11 | 12 | import torch 13 | import numpy as np 14 | from skimage import io 15 | import imageio 16 | import matplotlib.pyplot as plt 17 | 18 | from .. import soft_mesh_renderer as smr 19 | from ..common import obj_utils 20 | from ..common import camera_utils 21 | 22 | current_dir = os.path.dirname(os.path.realpath(__file__)) 23 | data_dir = os.path.join(current_dir, '.') 24 | 25 | if __name__ == "__main__": 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj')) 28 | parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, 'example6b_target.png')) 29 | parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example6b.mp4')) 30 | args = parser.parse_args() 31 | 32 | image_width = 100 33 | image_height = 100 34 | 35 | # load obj file 36 | vertices, triangles, _ = obj_utils.load_obj(args.filename_input) 37 | vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3] 38 | # TODO why are triangles not batched? 39 | 40 | # camera position: 41 | eye = torch.tensor([[0.0, 3.0, 3.0]], dtype=torch.float32) 42 | center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32) 43 | world_up = torch.tensor([0.0, np.cos(-np.pi/4.), np.sin(-np.pi/4.)], dtype=torch.float32) 44 | 45 | vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32) 46 | light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32) 47 | light_intensities = torch.ones([1, 1], dtype=torch.float32) 48 | 49 | initial_euler_angles = [[np.pi/4., 0., 0.]] 50 | euler_angles = torch.tensor(initial_euler_angles, requires_grad=True) 51 | 52 | def render_with_rotation(input_euler_angles): 53 | model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3] 54 | vertices_world_space = torch.matmul(vertices, model_rotation.T) 55 | 56 | render = smr.render( 57 | vertices_world_space, 58 | triangles, 59 | vertex_diffuse_colors, 60 | eye, 61 | center, 62 | world_up, 63 | light_positions, 64 | light_intensities, 65 | image_width, 66 | image_height 67 | ) 68 | render = torch.reshape(render, [image_height, image_width, 4]) 69 | return render 70 | 71 | """ 72 | Target was generated with: 73 | ``` 74 | target_euler_angles = torch.tensor([[0.0, 0.0, 0.0]]) 75 | ``` 76 | """ 77 | target_render = torch.tensor( 78 | io.imread(args.filename_target).astype(float) / 255.0 79 | ) # [image_width, image_height, 4] 80 | 81 | writer = imageio.get_writer(args.filename_output, fps=20) 82 | optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1) 83 | def stepfn(): 84 | optimizer.zero_grad() 85 | render = render_with_rotation(euler_angles) 86 | 87 | # write to GIF output 88 | frame = render.detach().numpy() # [image_height, image_width, 4] 89 | # black background 90 | frame = np.concatenate([ 91 | frame[:,:,:3]*frame[:,:,3][:,:,None], 92 | np.ones([image_height, image_width, 1], dtype=np.float32) 93 | ], axis=-1) 94 | writer.append_data((255*frame).astype(np.uint8)) 95 | 96 | loss = torch.mean(torch.abs(render - target_render)) 97 | loss.backward() 98 | torch.nn.utils.clip_grad_norm_([euler_angles], 1.0) 99 | return loss 100 | 101 | epochs = 50 102 | loss_points = [] 103 | for e in range(epochs): 104 | print("step {} of {}".format(e, epochs)) 105 | loss = optimizer.step(stepfn) 106 | loss_points.append(float(loss)) 107 | 108 | writer.close() 109 | 110 | x = np.arange(0, epochs, 1) 111 | y = np.array(loss_points) 112 | plt.plot(x, y) 113 | plt.show() 114 | -------------------------------------------------------------------------------- /src/examples/example6b_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example6b_target.png -------------------------------------------------------------------------------- /src/examples/example7b.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 7: Fitting sphere vertices to a cow. 3 | """ 4 | 5 | import os 6 | import argparse 7 | 8 | import torch 9 | import numpy as np 10 | from skimage import io 11 | import imageio 12 | import matplotlib.pyplot as plt 13 | 14 | from .. import soft_mesh_renderer as smr 15 | from ..common import shapes, obj_utils 16 | 17 | current_dir = os.path.dirname(os.path.realpath(__file__)) 18 | data_dir = os.path.join(current_dir, '.') 19 | 20 | # From PyTorch3D: 21 | # https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/ops/laplacian_matrices.py 22 | # 23 | # Note the laplacian depends only on the topology of a mesh and can be 24 | # considered constant if the topology is fixed. 25 | def compute_laplacian(verts: torch.Tensor, edges: torch.Tensor) -> torch.Tensor: 26 | """ 27 | Computes the laplacian matrix. 28 | The definition of the laplacian is 29 | L[i, j] = -1 , if i == j 30 | L[i, j] = 1 / deg(i) , if (i, j) is an edge 31 | L[i, j] = 0 , otherwise 32 | where deg(i) is the degree of the i-th vertex in the graph. 33 | 34 | Args: 35 | verts: tensor of shape (V, 3) containing the vertices of the graph 36 | edges: tensor of shape (E, 2) containing the vertex indices of each edge 37 | Returns: 38 | L: Sparse FloatTensor of shape (V, V) 39 | """ 40 | edges = edges.long() 41 | V = verts.shape[0] 42 | 43 | e0, e1 = edges.unbind(1) 44 | 45 | idx01 = torch.stack([e0, e1], dim=1) # (E, 2) 46 | idx10 = torch.stack([e1, e0], dim=1) # (E, 2) 47 | idx = torch.cat([idx01, idx10], dim=0).t() # (2, 2*E) 48 | 49 | # First, we construct the adjacency matrix, 50 | # i.e. A[i, j] = 1 if (i,j) is an edge, or 51 | # A[e0, e1] = 1 & A[e1, e0] = 1 52 | ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device) 53 | # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`. 54 | A = torch.sparse.FloatTensor(idx, ones, (V, V)) 55 | 56 | # the sum of i-th row of A gives the degree of the i-th vertex 57 | deg = torch.sparse.sum(A, dim=1).to_dense() 58 | 59 | # We construct the Laplacian matrix by adding the non diagonal values 60 | # i.e. L[i, j] = 1 ./ deg(i) if (i, j) is an edge 61 | deg0 = deg[e0] 62 | # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. 63 | deg0 = torch.where(deg0 > 0.0, 1.0 / deg0, deg0) 64 | deg1 = deg[e1] 65 | # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. 66 | deg1 = torch.where(deg1 > 0.0, 1.0 / deg1, deg1) 67 | val = torch.cat([deg0, deg1]) 68 | # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`. 69 | L = torch.sparse.FloatTensor(idx, val, (V, V)) 70 | 71 | # Then we add the diagonal values L[i, i] = -1. 72 | idx = torch.arange(V, device=verts.device) 73 | idx = torch.stack([idx, idx], dim=0) 74 | ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device) 75 | # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`. 76 | L -= torch.sparse.FloatTensor(idx, ones, (V, V)) 77 | 78 | return L 79 | 80 | def compute_edges_list(vertices, faces): 81 | """ 82 | Computes the edges of a mesh from its vertices and faces. 83 | Args: 84 | vertices: tensor of shape (V, 3) containing the vertices of the mesh 85 | faces: tensor of shape (F, 3) containing the vertex indices of each face 86 | Returns: 87 | edges: tensor of shape (E, 2) containing the vertex indices of each edge 88 | """ 89 | faces = faces.to(vertices.device) 90 | # pyre-fixme[16]: Module `torch` has no attribute `cat`. 91 | edges = torch.cat( 92 | [ 93 | faces[:, :2], 94 | faces[:, 1:], 95 | faces[:, ::2], 96 | ] 97 | ) 98 | edges = edges.view(-1, 2) 99 | edges = torch.unique(edges, dim=0) 100 | return edges 101 | 102 | def mesh_laplacian_smoothing_loss(vertices, laplacian): 103 | """ 104 | Computes the uniform weight laplacian smoothing objective for a single mesh (unbatched). 105 | Args: 106 | vertices: tensor of shape (V, 3) containing the vertices of the mesh 107 | laplacian: tensor of shape (V, V) containing the laplacian matrix of the mesh 108 | Returns: 109 | loss: the laplacian smoothing loss 110 | """ 111 | weight = 1.0 / (vertices.shape[0]) 112 | loss = laplacian.mm(vertices) 113 | loss = loss.norm(dim=1) 114 | loss = loss * weight 115 | return loss.sum() 116 | 117 | def mesh_edge_loss(vertices, edges): 118 | """ 119 | Computes the edge length loss for a single mesh (unbatched). 120 | Args: 121 | vertices: tensor of shape (V, 3) containing the vertices of the mesh 122 | edges: tensor of shape (E, 2) containing the vertex indices of each edge 123 | Returns: 124 | loss: the edge length loss 125 | """ 126 | v0 = vertices[edges[:, 0]] 127 | v1 = vertices[edges[:, 1]] 128 | loss = (v0 - v1).norm(dim=1, p=2) 129 | return loss.mean() 130 | 131 | if __name__ == "__main__": 132 | parser = argparse.ArgumentParser() 133 | parser.add_argument('-t1', '--target_image1', type=str, default=os.path.join(data_dir, 'example7b_target1.png')) 134 | parser.add_argument('-t2', '--target_image2', type=str, default=os.path.join(data_dir, 'example7b_target2.png')) 135 | parser.add_argument('-t3', '--target_image3', type=str, default=os.path.join(data_dir, 'example7b_target3.png')) 136 | parser.add_argument('-t4', '--target_image4', type=str, default=os.path.join(data_dir, 'example7b_target4.png')) 137 | parser.add_argument('-o', '--output_model', type=str, default=os.path.join(data_dir, 'example7b.obj')) 138 | parser.add_argument('-v', '--output_video', type=str, default=os.path.join(data_dir, 'example7b.mp4')) 139 | parser.add_argument('-p', '--output_previews_dir', type=str, default=os.path.join(data_dir, 'example7b_previews')) 140 | args = parser.parse_args() 141 | 142 | # load obj file 143 | sphere_resolution = 20 144 | vertices, triangles, _ = shapes.sphere(1., resolution=sphere_resolution) 145 | edges = compute_edges_list(vertices, triangles) 146 | laplacian = compute_laplacian(vertices, edges) 147 | 148 | vertices.requires_grad = True 149 | 150 | # camera positions: 151 | eye = torch.tensor([ 152 | [0.0, 0.0, -3.0], 153 | [3.0, 0.0, 0.0], 154 | [-3.0, 0.0, 0.0], 155 | [0.0, 0.0, 3.0], 156 | ], dtype=torch.float32) 157 | center = torch.zeros_like(eye) 158 | world_up = torch.tensor([ 159 | [0.0, 1.0, 0.0], 160 | [0.0, 1.0, 0.0], 161 | [0.0, 1.0, 0.0], 162 | [0.0, 1.0, 0.0], 163 | ], dtype=torch.float32) 164 | 165 | light_positions = torch.tensor([ 166 | [ 167 | [0.0, 0.0, -3.0], 168 | [0.0, 3.0, 0.0], 169 | [0.0, 0.0, 3.0], 170 | ], 171 | [ 172 | [0.0, 0.0, -3.0], 173 | [0.0, 3.0, 0.0], 174 | [0.0, 0.0, 3.0], 175 | ], 176 | [ 177 | [0.0, 0.0, -3.0], 178 | [0.0, 3.0, 0.0], 179 | [0.0, 0.0, 3.0], 180 | ], 181 | [ 182 | [0.0, 0.0, -3.0], 183 | [0.0, 3.0, 0.0], 184 | [0.0, 0.0, 3.0], 185 | ], 186 | ], dtype=torch.float32) 187 | light_intensities = torch.ones([4, 3], dtype=torch.float32) 188 | 189 | # Create a diffuse colors tensor coloring all vertices white 190 | vertex_diffuse_colors = torch.ones([4, vertices.shape[0], 3], dtype=torch.float32) 191 | 192 | image_width = 96 193 | image_height = 96 194 | 195 | target_render1 = torch.tensor( 196 | io.imread(args.target_image1).astype(float) / 255.0 197 | ) # [image_width, image_height, 4] 198 | target_render2 = torch.tensor( 199 | io.imread(args.target_image2).astype(float) / 255.0 200 | ) # [image_width, image_height, 4] 201 | target_render3 = torch.tensor( 202 | io.imread(args.target_image3).astype(float) / 255.0 203 | ) # [image_width, image_height, 4] 204 | target_render4 = torch.tensor( 205 | io.imread(args.target_image4).astype(float) / 255.0 206 | ) # [image_width, image_height, 4] 207 | target_renders = torch.stack([target_render1, target_render2, target_render3, target_render4], dim=0) 208 | 209 | epochs_between_frames = 10 210 | epochs_between_previews = 100 211 | 212 | writer = imageio.get_writer(args.output_video, fps=20 / epochs_between_frames) 213 | sigma_val = 1e-4 214 | blur_radius = 0.1 215 | edge_loss_weight = 0.1 216 | laplacian_loss_weight = 0.1 217 | lr = 4.0 218 | momentum = 0.1 219 | optimizer = torch.optim.SGD([vertices], lr, momentum) 220 | def stepfn(e): 221 | optimizer.zero_grad() 222 | 223 | # We need to re-create this tensor from `vertices` each run to 224 | # ensure it gets changes from optimizer updates. 225 | batched_vertices = torch.stack([vertices]*4, dim=0) 226 | batched_renders = smr.render( 227 | batched_vertices, 228 | triangles, 229 | vertex_diffuse_colors, 230 | eye, 231 | center, 232 | world_up, 233 | light_positions, 234 | light_intensities, 235 | image_width, 236 | image_height, 237 | sigma_val=1e-4, 238 | fov_y=60.0, 239 | blur_radius=0.1 240 | ) 241 | 242 | loss = torch.mean((batched_renders[..., 3] - target_renders[..., 3])**2) 243 | loss += mesh_edge_loss(vertices, edges) * edge_loss_weight 244 | loss += mesh_laplacian_smoothing_loss(vertices, laplacian) * laplacian_loss_weight 245 | 246 | loss.backward() 247 | torch.nn.utils.clip_grad_norm_([vertices], 1.0) 248 | 249 | render = torch.reshape(batched_renders[0], [image_height, image_width, 4]) 250 | if e % epochs_between_frames == 0: 251 | # write to video output 252 | frame = render.detach().numpy() # [image_height, image_width, 4] 253 | # black background 254 | frame = np.concatenate([ 255 | frame[:,:,:3]*frame[:,:,3][:,:,None], 256 | np.ones([image_height, image_width, 1], dtype=np.float32) 257 | ], axis=-1) 258 | writer.append_data((255*frame).astype(np.uint8)) 259 | 260 | print("\nappended frame {} to video output\n".format(e // epochs_between_frames)) 261 | if e % epochs_between_previews == 0: 262 | # write a preview image to the preview directory 263 | preview_image_path = os.path.join(args.output_previews_dir, "preview_{:04d}.png".format(e)) 264 | preview_obj_path = os.path.join(args.output_previews_dir, "preview_{:04d}.obj".format(e)) 265 | result_image = render.detach().numpy() 266 | # Binarize the alpha channel to 0 or 1. In the raw output of the soft renderer, 267 | # it represents the probability that a triangle occupies the pixel. This will be 268 | # less than 1.0 for any pixel which is not entirely covered by a triangle, even if 269 | # the pixel is technically completely covered when considering all triangles. If we 270 | # don't binarize the value, we will get seams in the output along triangle edges. 271 | result_image[..., 3] = 1.0 * (result_image[..., 3] > 0.0) 272 | result_image = np.clip(result_image, 0., 1.).copy(order="C") 273 | io.imsave(preview_image_path, (result_image * 255.0).astype(np.uint8)) 274 | 275 | obj_utils.save_obj(preview_obj_path, vertices, triangles) 276 | 277 | print("\nsaved previews to {} and {}\n".format(preview_image_path, preview_obj_path)) 278 | 279 | return loss 280 | 281 | epochs = 1000 282 | loss_points = [] 283 | for e in range(epochs): 284 | print("\nstep {} of {}\n".format(e, epochs)) 285 | loss = optimizer.step(lambda: stepfn(e)) 286 | loss_points.append(float(loss)) 287 | 288 | writer.close() 289 | obj_utils.save_obj(args.output_model, vertices, triangles) 290 | 291 | x = np.arange(0, epochs, 1) 292 | y = np.array(loss_points) 293 | plt.plot(x, y) 294 | plt.show() 295 | 296 | -------------------------------------------------------------------------------- /src/examples/example7b_target1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example7b_target1.png -------------------------------------------------------------------------------- /src/examples/example7b_target2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example7b_target2.png -------------------------------------------------------------------------------- /src/examples/example7b_target3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example7b_target3.png -------------------------------------------------------------------------------- /src/examples/example7b_target4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example7b_target4.png -------------------------------------------------------------------------------- /src/mesh_renderer/README.md: -------------------------------------------------------------------------------- 1 | # mesh_renderer 2 | 3 | This package contains a differentiable, 3D mesh renderer using the barycentric formulation from Genova, Kyle, et al. "Unsupervised training for 3d morphable model regression." It is a port of Google's [tf_mesh_renderer](https://github.com/google/tf_mesh_renderer) to PyTorch. 4 | 5 | There is an optimized C++ implementation of this renderer available for use. To enable it, first install the kernel via `cd src/mesh_renderer/kernels && python setup.py install`, then change the hardcoded config variable `USE_CPP_RASTERIZER` as described in the [mesh_renderer docs](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/mesh_renderer/README.md). 6 | 7 | # Testing 8 | 9 | To test the rasterizer module, run from the repository root: 10 | ``` 11 | python -m src.mesh_renderer.rasterize_triangles_test 12 | ``` 13 | 14 | To test the mesh renderer, run from the repository root: 15 | 16 | ``` 17 | python -m src.mesh_renderer.mesh_renderer_test 18 | ``` 19 | 20 | # Usage 21 | 22 | The mesh renderer provides a high-level API for rendering triangle meshes with shading and a low-level API for rasterizing batches of triangles. The APIs are mostly the same as those in [tf_mesh_renderer](https://github.com/google/tf_mesh_renderer) but adjusted for PyTorch. 23 | 24 | ## Rendering a shaded mesh 25 | 26 | ### `mesh_renderer` 27 | 28 | Rendering a shaded mesh can be done with the `mesh_renderer` function in `mesh_renderer/mesh_renderer.py`. This function renders an input scene (mesh, lights, and camera) using phong shading, and returns an output image. 29 | 30 | #### Args: 31 | 32 | - `vertices`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. Each triplet is an xyz position in world space. 33 | - `triangles`: 2D int32 tensor with shape `[triangle_count, 3]`. Each triplet should contain vertex indices describing a triangle such that the triangle's normal points toward the viewer if the forward order of the triplet defines a clockwise winding of the vertices. Gradients with respect to this tensor are not available. 34 | - `normals`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. Each triplet is the xyz vertex normal for its corresponding vertex. Each vector is assumed to be already normalized. 35 | - `diffuse_colors`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. The RGB diffuse reflection in the range `[0, 1]` for each vertex. 36 | - `camera_position`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape ` [3]` specifying the XYZ world space camera position. 37 | - `camera_lookat`: 2D tensor with shape [batch_size, 3] or 1D tensor with shape `[3]` containing an XYZ point along the center of the camera's gaze. 38 | - `camera_up`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape `[3]` containing the up direction for the camera. The camera will have no tilt with respect to this direction. 39 | - `light_positions`: a 3D tensor with shape `[batch_size, light_count, 3]`. The XYZ position of each light in the scene. In the same coordinate space as pixel_positions. 40 | - `light_intensities`: a 3D tensor with shape `[batch_size, light_count, 3]`. The RGB intensity values for each light. Intensities may be above 1. 41 | - `image_width`: int specifying desired output image width in pixels. 42 | - `image_height`: int specifying desired output image height in pixels. 43 | - `specular_colors`: (optional) 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. The RGB specular reflection in the range `[0, 1]` for each vertex. If supplied, specular reflections will be computed, and both specular colors and shininess_coefficients are expected. 44 | - `shininess_coefficients`: (optional) a 0D-2D float32 tensor with maximum shape `[batch_size, vertex_count]`. The phong shininess coefficient of each vertex. A 0D tensor or float gives a constant shininess coefficient of all vertices across all batches and images. A 1D tensor must have shape `[batch_size]`, and a single shininess coefficient per image is used. 45 | - `ambient_color`: (optional) a 2D tensor with shape `[batch_size, 3]`. The RGB ambient color, which is added to each pixel in the scene. If None, it is assumed to be black. 46 | - `fov_y`: (optional) float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying desired output image y field of view in degrees. 47 | - `near_clip`: (optional) float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying near clipping plane distance. 48 | - `far_clip`: (optional) float, 0D tensor, or 1D tensor with shape [batch_size] specifying far clipping plane distance. 49 | 50 | 51 | #### Returns: 52 | 53 | A 4D float32 tensor of shape `[batch_size, image_height, image_width, 4]` containing the lit RGBA color values for each image at each pixel. RGB colors are the intensity values before tonemapping and can be in the range `[0, infinity]`. Clipping to the range `[0, 1]` with `np.clip` is likely reasonable for both viewing and training most scenes. More complex scenes with multiple lights should tone map color values for display only. One simple tonemapping approach is to rescale color values as x/(1+x); gamma compression is another common technique. Alpha values are zero for background pixels and near one for mesh pixels. 54 | 55 | ### Example 56 | 57 | An example usage of the differentiable mesh renderer to render a cube, then optimize its rotation to match a target image can be seen in the `testThatCubeRotates` test case in `mesh_renderer_test.py`. 58 | 59 | ## Rasterizing triangles with arbitrary attributes 60 | 61 | ### `rasterize` 62 | 63 | This is a lower-level function which can be used to rasterize a batch of triangles into a tensor providing interpolated vertex attributes in each pixel. This could be useful if you want to build your own shading on top of the core rasterization module, for example. 64 | 65 | #### Args: 66 | 67 | - `world_space_vertices`: 3D float32 tensor of xyz positions with shape `[batch_size, vertex_count, 3]`. 68 | - `attributes`: 3D float32 tensor with shape `[batch_size, vertex_count, attribute_count]`. Each vertex attribute is interpolated across the triangle using barycentric interpolation. 69 | - `triangles`: 2D int32 tensor with shape `[triangle_count, 3]`. Each triplet should contain vertex indices describing a triangle such that the triangle's normal points toward the viewer if the forward order of the triplet defines a clockwise winding of the vertices. Gradients with respect to this tensor are not available. 70 | - `camera_matrices`: 3D float tensor with shape `[batch_size, 4, 4]` containing model-view-perspective projection matrices. 71 | - `image_width`: int specifying desired output image width in pixels. 72 | - `image_height`: int specifying desired output image height in pixels. 73 | - `background_value`: a 1D float32 tensor with shape `[attribute_count]`. Pixels that lie outside all triangles take this value. 74 | 75 | #### Returns: 76 | 77 | - A 4D float32 tensor with shape `[batch_size, image_height, image_width, attribute_count]`, containing the interpolated vertex attributes at each pixel. 78 | 79 | ### Example 80 | 81 | An example usage of the `rasterize` API to rasterize a cube can be found in the `testRendersTwoCubesInBatch` test case in `rasterize_triangles_test.py`. 82 | 83 | ## `camera_utils` 84 | 85 | This file contains some utilities that may be useful for transforming the input scene before rendering. The `mesh_renderer` function uses some of these functions internally to project the world-space vertices into camera-space. Model-view-perspective projection matrices are also required as input to the lower-level rasterization APIs. 86 | 87 | ### `euler_matrices`. 88 | 89 | You can use this to create a Model matrix with rotation to transform a set of object-space vertices into world space before rendering it. 90 | 91 | #### Args: 92 | 93 | - `angles`: a `[batch_size, 3]` tensor containing X, Y, and Z angles in radians. 94 | 95 | #### Returns: 96 | 97 | - A `[batch_size, 4, 4]` tensor of matrices. 98 | 99 | ### `look_at` 100 | 101 | You can use this to compute a View matrix to transform a set of world-space vertices into eye space; this is primarily useful for the lower-level rasterization APIs which require an input View matrix. 102 | 103 | #### Args: 104 | 105 | - `eye`: 2D float32 tensor with shape `[batch_size, 3]` containing the XYZ world space position of the camera. 106 | - `center`: 2D float32 tensor with shape `[batch_size, 3]` containing a position along the center of the camera's gaze line. 107 | - `world_up`: 2D float32 tensor with shape `[batch_size, 3]` specifying the world's up direction; the output camera will have no tilt with respect to this direction. 108 | 109 | #### Returns: 110 | 111 | - A `[batch_size, 4, 4]` float tensor containing a right-handed camera extrinsics matrix that maps points from world space to points in eye space. 112 | 113 | # Implementation notes 114 | 115 | 116 | ## Rasterizer 117 | 118 | There are two implementations of the low-level `rasterize` API. 119 | 120 | ### C++ kernel 121 | 122 | This implementation is written in C++ for performance. Since it doesn't use PyTorch built-in functions under-the-hood and instead [extends `torch.autograd.Function`](https://pytorch.org/docs/stable/notes/extending.html#extending-autograd), the backward pass is explicitly written rather than just being implicit in the forward pass. Both are written in the [C++ extension](https://pytorch.org/tutorials/advanced/cpp_extension.html) in `src/mesh_renderer/kernels/rasterize_triangles.cpp`, with the wrapper code in `src/mesh_renderer/rasterize_triangles_ext.py`. 123 | 124 | This implementation is enabled by setting the hard-coded global variable `USE_CPP_RASTERIZER = True` in `src/mesh_renderer/rasterize_triangles.py`. 125 | 126 | ### Python-only kernel 127 | 128 | This implementation is written in Python only in `src/mesh_renderer/rasterize_triangles_python.py` and leverages PyTorch built-in functions for autograd. It's much shorter than the C++ kernel and is intended to be simpler to understand. However, performance is much worse. 129 | 130 | This implementation is enabled by setting the hard-coded global variable `USE_CPP_RASTERIZER = False` in `src/mesh_renderer/rasterize_triangles.py`. This is the default. -------------------------------------------------------------------------------- /src/mesh_renderer/__init__.py: -------------------------------------------------------------------------------- 1 | from .render import render, tone_mapper 2 | from .rasterize import rasterize 3 | 4 | __version__ = '0.0.1' 5 | name = 'mesh_renderer' 6 | -------------------------------------------------------------------------------- /src/mesh_renderer/kernels/rasterize_triangles.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | namespace { 8 | 9 | // Threshold for a barycentric coordinate triplet's sum, below which the 10 | // coordinates at a pixel are deemed degenerate. Most such degenerate 11 | // triplets in an image will be exactly zero, as this is how pixels outside 12 | // the mesh are rendered. 13 | constexpr float kDegenerateBarycentricCoordinatesCutoff = 0.9f; 14 | 15 | } 16 | 17 | // Takes the maximum of a, b, and c, rounds up, and converts to an integer 18 | // in the range [low, high]. 19 | inline int clamped_integer_max(float a, float b, float c, int low, int high) { 20 | return std::min( 21 | std::max(static_cast(std::ceil(std::max(std::max(a, b), c))), low), 22 | high); 23 | } 24 | 25 | // Takes the minimum of a, b, and c, rounds down, and converts to an integer 26 | // in the range [low, high]. 27 | inline int clamped_integer_min(float a, float b, float c, int low, int high) { 28 | return std::min( 29 | std::max(static_cast(std::floor(std::min(std::min(a, b), c))), low), 30 | high); 31 | } 32 | 33 | // Compute the edge functions from M^-1 as described by Olano and Greer, 34 | // "Triangle Scan Conversion using 2D Homogeneous Coordinates." 35 | // 36 | // This function combines equations (3) and (4). It first computes 37 | // [a b c] = u_i * M^-1, where u_0 = [1 0 0], u_1 = [0 1 0], etc., 38 | // then computes edge_i = aX + bY + c. 39 | void compute_edge_functions(const float px, const float py, 40 | const float m_inv[9], float values[3]) { 41 | for (int i = 0; i < 3; ++i) { 42 | const float a = m_inv[3 * i + 0]; 43 | const float b = m_inv[3 * i + 1]; 44 | const float c = m_inv[3 * i + 2]; 45 | 46 | values[i] = a * px + b * py + c; 47 | } 48 | } 49 | 50 | // Compute a 3x3 matrix inverse without dividing by the determinant. 51 | // Instead, makes an unnormalized matrix inverse with the corect sign 52 | // by flipping a sign of the matric if the determinant is negative. 53 | // By leaving out determinant division, the rows of M^-1 only depend on two out 54 | // of three of the columns of M; i.e., the first row of M^-1 only depends on the 55 | // second and third columns of M, the second only depends on the first and 56 | // third, etc. This means we can compute edge functions for two neighboring 57 | // triangles independently and produce exactly the same numerical result up 58 | // to the sign. This in turn means we can avoid cracks in rasterization without 59 | // using fixed-point arithmetic. 60 | // See http://mathworld.wolfram.com/MatrixInverse.html 61 | float compute_unnormalized_matrix_inverse( 62 | const float a11, const float a12, const float a13, 63 | const float a21, const float a22, const float a23, 64 | const float a31, const float a32, const float a33, float m_inv[9]) { 65 | m_inv[0] = a22 * a33 - a32 * a23; 66 | m_inv[1] = a13 * a32 - a33 * a12; 67 | m_inv[2] = a12 * a23 - a22 * a13; 68 | m_inv[3] = a23 * a31 - a33 * a21; 69 | m_inv[4] = a11 * a33 - a31 * a13; 70 | m_inv[5] = a13 * a21 - a23 * a11; 71 | m_inv[6] = a21 * a32 - a31 * a22; 72 | m_inv[7] = a12 * a31 - a32 * a11; 73 | m_inv[8] = a11 * a22 - a21 * a12; 74 | 75 | // The first column of the unnormalized M^-1 contains intermediate values for 76 | // det(M). 77 | const float det = a11 * m_inv[0] + a12 * m_inv[3] + a13 * m_inv[6]; 78 | 79 | // Transfer the sign of the determinant. 80 | if (det < 0.0f) { 81 | for (int i = 0; i < 9; ++i) { 82 | m_inv[i] = -m_inv[i]; 83 | } 84 | } 85 | 86 | return det; 87 | } 88 | 89 | // Determine whether the point p lies inside a front-facing triangle. 90 | // Count pixels exactly on an edge as inside the triangle, as long as the 91 | // triangle is not degenerate. Degenerate (zero-area) triangles always fail 92 | // the inside test. 93 | bool pixel_is_inside_triangle(const float edge_values[3]) { 94 | // Check that the edge values are all non-negative and that at least one is 95 | // positive (triangle is non-degenerate). 96 | return (edge_values[0] >= 0 && edge_values[1] >= 0 && edge_values[2] >= 0) && 97 | (edge_values[0] > 0 || edge_values[1] > 0 || edge_values[2] > 0); 98 | } 99 | 100 | // Compute df_dvertices, the derivative of a scalar loss function with respect 101 | // to the vector of stacked vertex coordinates in XYZW clip space. 102 | // 103 | // Params: 104 | // df_dbarycentric_coords: A 3D float32 tensor with shape 105 | // {image_height, image_width, 3}. The element at index [y, x, b] gives the 106 | // partial derivative of the scalar loss function with respect to the bth 107 | // barycentric coordinate of pixel coordinate (y, x). 108 | // vertices: A 2D float32 tensor with shape {vertex_count, 4}. 109 | // Each quadtruplet is the XYZW location of the vertex with that 110 | // triplet's id. The coordinates are assumed to be OpenGL-style clip-space 111 | // (i.e., post-projection, pre-divide), where X points right, Y points up, 112 | // Z points away. Note Z here is the clip-space (z-buffer) depth and W is the 113 | // world space depth. 114 | // triangles: A 2D int32 tensor with shape {triangle_count, 3}. 115 | // Each triplet is the three vertex ids indexing into vertices 116 | // describing one triangle with clockwise winding. 117 | // px_triangle_ids: A 2D tensor with shape {image_height, image_width}. 118 | // At return, each pixel contains a triangle id in the range 119 | // [0, triangle_count). The id value is also 0 if there is no triangle 120 | // at the pixel. The px_barycentric_coordinates must be checked to distinguish 121 | // between the two cases. 122 | // px_barycentric_coordinates: A 3D tensor with 123 | // shape {image_height, image_width, 3}. At return, contains the triplet of 124 | // barycentric coordinates at each pixel in the same vertex ordering as 125 | // triangles. If no triangle is present, all coordinates are 0. 126 | // 127 | // Returns: 128 | // df_dvertices: A 2D tensor with shape {vertex_count, 4} giving the derivative 129 | // of the scalar loss function f with respect to the vector of stacked vertex 130 | // coordinates in XYZW clip space. 131 | std::vector rasterize_triangles_backward( 132 | const torch::Tensor &df_dbarycentric_coords, 133 | const torch::Tensor &vertices, 134 | const torch::Tensor &triangles, 135 | const torch::Tensor &px_triangle_ids, 136 | const torch::Tensor &px_barycentric_coords 137 | ) { 138 | const int triangle_count = (int) triangles.size(0); 139 | const int vertex_count = (int) vertices.size(0); 140 | const int image_height = px_triangle_ids.size(0); 141 | const int image_width = px_triangle_ids.size(1); 142 | float unnormalized_matrix_inverse[9]; 143 | 144 | auto df_dvertices = torch::zeros( 145 | {vertex_count, 4}, 146 | torch::dtype(torch::kFloat32)); 147 | 148 | auto df_dbarycentric_coords_a = df_dbarycentric_coords.accessor(); 149 | auto vertices_a = vertices.accessor(); 150 | auto triangles_a = triangles.accessor(); 151 | auto px_triangle_ids_a = px_triangle_ids.accessor(); 152 | auto px_barycentric_coords_a = 153 | px_barycentric_coords.accessor(); 154 | auto df_dvertices_a = df_dvertices.accessor(); 155 | 156 | for (int iy = 0; iy < image_height; ++iy) { 157 | for (int ix = 0; ix < image_width; ++ix) { 158 | int triangle_id = px_triangle_ids_a[iy][ix]; 159 | const float b0 = px_barycentric_coords_a[iy][ix][0]; 160 | const float b1 = px_barycentric_coords_a[iy][ix][1]; 161 | const float b2 = px_barycentric_coords_a[iy][ix][2]; 162 | if (triangle_id == 0 && b0 + b1 + b2 < kDegenerateBarycentricCoordinatesCutoff) { 163 | continue; 164 | } 165 | 166 | const int v0_id = triangles_a[triangle_id][0]; 167 | const int v1_id = triangles_a[triangle_id][1]; 168 | const int v2_id = triangles_a[triangle_id][2]; 169 | 170 | const float v0x = vertices_a[v0_id][0]; 171 | const float v0y = vertices_a[v0_id][1]; 172 | const float v0w = vertices_a[v0_id][3]; 173 | const float v1x = vertices_a[v1_id][0]; 174 | const float v1y = vertices_a[v1_id][1]; 175 | const float v1w = vertices_a[v1_id][3]; 176 | const float v2x = vertices_a[v2_id][0]; 177 | const float v2y = vertices_a[v2_id][1]; 178 | const float v2w = vertices_a[v2_id][3]; 179 | 180 | const float abs_det = std::abs( 181 | compute_unnormalized_matrix_inverse( 182 | v0x, v1x, v2x, 183 | v0y, v1y, v2y, 184 | v0w, v1w, v2w, 185 | unnormalized_matrix_inverse)); 186 | 187 | const float m_inv_d_dx = ( 188 | unnormalized_matrix_inverse[0] + 189 | unnormalized_matrix_inverse[3] + 190 | unnormalized_matrix_inverse[6]); 191 | const float m_inv_d_dy = ( 192 | unnormalized_matrix_inverse[1] + 193 | unnormalized_matrix_inverse[4] + 194 | unnormalized_matrix_inverse[7]); 195 | const float m_inv_d_dw = ( 196 | unnormalized_matrix_inverse[2] + 197 | unnormalized_matrix_inverse[5] + 198 | unnormalized_matrix_inverse[8]); 199 | 200 | // All of the below derivatives need to be normalized by abs_det. 201 | 202 | const float db0_dx0 = (-unnormalized_matrix_inverse[0]) * b0 + m_inv_d_dx * b0 * b0; 203 | const float db0_dx1 = (-unnormalized_matrix_inverse[0]) * b1 + m_inv_d_dx * b0 * b1; 204 | const float db0_dx2 = (-unnormalized_matrix_inverse[0]) * b2 + m_inv_d_dx * b0 * b2; 205 | const float db0_dy0 = (-unnormalized_matrix_inverse[1]) * b0 + m_inv_d_dy * b0 * b0; 206 | const float db0_dy1 = (-unnormalized_matrix_inverse[1]) * b1 + m_inv_d_dy * b0 * b1; 207 | const float db0_dy2 = (-unnormalized_matrix_inverse[1]) * b2 + m_inv_d_dy * b0 * b2; 208 | const float db0_dw0 = (-unnormalized_matrix_inverse[2]) * b0 + m_inv_d_dw * b0 * b0; 209 | const float db0_dw1 = (-unnormalized_matrix_inverse[2]) * b1 + m_inv_d_dw * b0 * b1; 210 | const float db0_dw2 = (-unnormalized_matrix_inverse[2]) * b2 + m_inv_d_dw * b0 * b2; 211 | 212 | const float db1_dx0 = (-unnormalized_matrix_inverse[3]) * b0 + m_inv_d_dx * b1 * b0; 213 | const float db1_dx1 = (-unnormalized_matrix_inverse[3]) * b1 + m_inv_d_dx * b1 * b1; 214 | const float db1_dx2 = (-unnormalized_matrix_inverse[3]) * b2 + m_inv_d_dx * b1 * b2; 215 | const float db1_dy0 = (-unnormalized_matrix_inverse[4]) * b0 + m_inv_d_dy * b1 * b0; 216 | const float db1_dy1 = (-unnormalized_matrix_inverse[4]) * b1 + m_inv_d_dy * b1 * b1; 217 | const float db1_dy2 = (-unnormalized_matrix_inverse[4]) * b2 + m_inv_d_dy * b1 * b2; 218 | const float db1_dw0 = (-unnormalized_matrix_inverse[5]) * b0 + m_inv_d_dw * b1 * b0; 219 | const float db1_dw1 = (-unnormalized_matrix_inverse[5]) * b1 + m_inv_d_dw * b1 * b1; 220 | const float db1_dw2 = (-unnormalized_matrix_inverse[5]) * b2 + m_inv_d_dw * b1 * b2; 221 | 222 | const float db2_dx0 = (-unnormalized_matrix_inverse[6]) * b0 + m_inv_d_dx * b2 * b0; 223 | const float db2_dx1 = (-unnormalized_matrix_inverse[6]) * b1 + m_inv_d_dx * b2 * b1; 224 | const float db2_dx2 = (-unnormalized_matrix_inverse[6]) * b2 + m_inv_d_dx * b2 * b2; 225 | const float db2_dy0 = (-unnormalized_matrix_inverse[7]) * b0 + m_inv_d_dy * b2 * b0; 226 | const float db2_dy1 = (-unnormalized_matrix_inverse[7]) * b1 + m_inv_d_dy * b2 * b1; 227 | const float db2_dy2 = (-unnormalized_matrix_inverse[7]) * b2 + m_inv_d_dy * b2 * b2; 228 | const float db2_dw0 = (-unnormalized_matrix_inverse[8]) * b0 + m_inv_d_dw * b2 * b0; 229 | const float db2_dw1 = (-unnormalized_matrix_inverse[8]) * b1 + m_inv_d_dw * b2 * b1; 230 | const float db2_dw2 = (-unnormalized_matrix_inverse[8]) * b2 + m_inv_d_dw * b2 * b2; 231 | 232 | df_dvertices_a[v0_id][0] += ( 233 | df_dbarycentric_coords_a[iy][ix][0] * db0_dx0 + 234 | df_dbarycentric_coords_a[iy][ix][1] * db1_dx0 + 235 | df_dbarycentric_coords_a[iy][ix][2] * db2_dx0) / abs_det; 236 | df_dvertices_a[v0_id][1] += ( 237 | df_dbarycentric_coords_a[iy][ix][0] * db0_dy0 + 238 | df_dbarycentric_coords_a[iy][ix][1] * db1_dy0 + 239 | df_dbarycentric_coords_a[iy][ix][2] * db2_dy0) / abs_det; 240 | df_dvertices_a[v0_id][3] += ( 241 | df_dbarycentric_coords_a[iy][ix][0] * db0_dw0 + 242 | df_dbarycentric_coords_a[iy][ix][1] * db1_dw0 + 243 | df_dbarycentric_coords_a[iy][ix][2] * db2_dw0) / abs_det; 244 | 245 | df_dvertices_a[v1_id][0] += ( 246 | df_dbarycentric_coords_a[iy][ix][0] * db0_dx1 + 247 | df_dbarycentric_coords_a[iy][ix][1] * db1_dx1 + 248 | df_dbarycentric_coords_a[iy][ix][2] * db2_dx1) / abs_det; 249 | df_dvertices_a[v1_id][1] += ( 250 | df_dbarycentric_coords_a[iy][ix][0] * db0_dy1 + 251 | df_dbarycentric_coords_a[iy][ix][1] * db1_dy1 + 252 | df_dbarycentric_coords_a[iy][ix][2] * db2_dy1) / abs_det; 253 | df_dvertices_a[v1_id][3] += ( 254 | df_dbarycentric_coords_a[iy][ix][0] * db0_dw1 + 255 | df_dbarycentric_coords_a[iy][ix][1] * db1_dw1 + 256 | df_dbarycentric_coords_a[iy][ix][2] * db2_dw1) / abs_det; 257 | 258 | df_dvertices_a[v2_id][0] += ( 259 | df_dbarycentric_coords_a[iy][ix][0] * db0_dx2 + 260 | df_dbarycentric_coords_a[iy][ix][1] * db1_dx2 + 261 | df_dbarycentric_coords_a[iy][ix][2] * db2_dx2) / abs_det; 262 | df_dvertices_a[v2_id][1] += ( 263 | df_dbarycentric_coords_a[iy][ix][0] * db0_dy2 + 264 | df_dbarycentric_coords_a[iy][ix][1] * db1_dy2 + 265 | df_dbarycentric_coords_a[iy][ix][2] * db2_dy2) / abs_det; 266 | df_dvertices_a[v2_id][3] += ( 267 | df_dbarycentric_coords_a[iy][ix][0] * db0_dw2 + 268 | df_dbarycentric_coords_a[iy][ix][1] * db1_dw2 + 269 | df_dbarycentric_coords_a[iy][ix][2] * db2_dw2) / abs_det; 270 | } 271 | } 272 | return { df_dvertices }; 273 | } 274 | 275 | // Compute the triangle id, barycentric coordinates, and z-buffer at each pixel 276 | // in the image. 277 | // 278 | // Params: 279 | // vertices: A 2D float32 tensor with shape {vertex_count, 4}. 280 | // Each quadtruplet is the XYZW location of the vertex with that 281 | // triplet's id. The coordinates are assumed to be OpenGL-style clip-space 282 | // (i.e., post-projection, pre-divide), where X points right, Y points up, 283 | // Z points away. Note Z here is the clip-space (z-buffer) depth and W is the 284 | // world space depth. 285 | // triangles: A 2D int32 tensor with shape {triangle_count, 3}. 286 | // Each triplet is the three vertex ids indexing into vertices 287 | // describing one triangle with clockwise winding. 288 | // 289 | // Returns: 290 | // px_triangle_ids: A 2D tensor with shape {image_height, image_width}. 291 | // At return, each pixel contains a triangle id in the range 292 | // [0, triangle_count). The id value is also 0 if there is no triangle 293 | // at the pixel. The px_barycentric_coordinates must be checked to distinguish 294 | // between the two cases. 295 | // px_barycentric_coordinates: A 3D tensor with 296 | // shape {image_height, image_width, 3}. At return, contains the triplet of 297 | // barycentric coordinates at each pixel in the same vertex ordering as 298 | // triangles. If no triangle is present, all coordinates are 0. 299 | // z_buffer: A 2D tensor with shape {image_height, image_width} elements. At 300 | // return, contains the normalized device Z coordinates of the rendered 301 | // triangles. 302 | std::vector rasterize_triangles_forward( 303 | const torch::Tensor &vertices, 304 | const torch::Tensor &triangles, 305 | int image_width, 306 | int image_height 307 | ) { 308 | const int triangle_count = (int) triangles.size(0); 309 | const float half_image_width = 0.5 * image_width; 310 | const float half_image_height = 0.5 * image_height; 311 | float unnormalized_matrix_inverse[9]; 312 | float b_over_w[3]; 313 | auto px_triangle_ids = torch::zeros( 314 | {image_height, image_width}, 315 | torch::dtype(torch::kInt32)); 316 | auto px_barycentric_coords = torch::zeros( 317 | {image_height, image_width, 3}, 318 | torch::dtype(torch::kFloat32).requires_grad(true)); 319 | auto z_buffer = torch::ones( 320 | {image_height, image_width}, 321 | torch::dtype(torch::kFloat32)); 322 | 323 | auto vertices_a = vertices.accessor(); 324 | auto triangles_a = triangles.accessor(); 325 | auto z_buffer_a = z_buffer.accessor(); 326 | auto px_triangle_ids_a = px_triangle_ids.accessor(); 327 | auto px_barycentric_coords_a = 328 | px_barycentric_coords.accessor(); 329 | 330 | for (int triangle_id = 0; triangle_id < triangle_count; ++triangle_id) { 331 | const int v0_id = triangles_a[triangle_id][0]; 332 | const int v1_id = triangles_a[triangle_id][1]; 333 | const int v2_id = triangles_a[triangle_id][2]; 334 | 335 | const float v0w = vertices_a[v0_id][3]; 336 | const float v1w = vertices_a[v1_id][3]; 337 | const float v2w = vertices_a[v2_id][3]; 338 | // Early exit: if all w < 0, triangle is entirely behind the eye. 339 | if (v0w < 0 && v1w < 0 && v2w < 0) { 340 | continue; 341 | } 342 | 343 | const float v0x = vertices_a[v0_id][0]; 344 | const float v0y = vertices_a[v0_id][1]; 345 | const float v1x = vertices_a[v1_id][0]; 346 | const float v1y = vertices_a[v1_id][1]; 347 | const float v2x = vertices_a[v2_id][0]; 348 | const float v2y = vertices_a[v2_id][1]; 349 | 350 | compute_unnormalized_matrix_inverse(v0x, v1x, v2x, 351 | v0y, v1y, v2y, 352 | v0w, v1w, v2w, 353 | unnormalized_matrix_inverse); 354 | 355 | // Initialize the bounding box to the entire screen. 356 | int left = 0, right = image_width, bottom = 0, top = image_height; 357 | // If the triangle is entirely inside the screen, project the vertices to 358 | // pixel coordinates and find the triangle bounding box enlarged to the 359 | // nearest integer and clamped to the image boundaries. 360 | if (v0w > 0 && v1w > 0 && v2w > 0) { 361 | const float p0x = (v0x / v0w + 1.0) * half_image_width; 362 | const float p1x = (v1x / v1w + 1.0) * half_image_width; 363 | const float p2x = (v2x / v2w + 1.0) * half_image_width; 364 | const float p0y = (v0y / v0w + 1.0) * half_image_height; 365 | const float p1y = (v1y / v1w + 1.0) * half_image_height; 366 | const float p2y = (v2y / v2w + 1.0) * half_image_height; 367 | left = clamped_integer_min(p0x, p1x, p2x, 0, image_width); 368 | right = clamped_integer_max(p0x, p1x, p2x, 0, image_width); 369 | bottom = clamped_integer_min(p0y, p1y, p2y, 0, image_height); 370 | top = clamped_integer_max(p0y, p1y, p2y, 0, image_height); 371 | } 372 | 373 | // Iterate over each pixel in the bounding box. 374 | for (int iy = bottom; iy < top; ++iy) { 375 | for (int ix = left; ix < right; ++ix) { 376 | const float px = ((ix + 0.5) / half_image_width) - 1.0; 377 | const float py = ((iy + 0.5) / half_image_height) - 1.0; 378 | 379 | compute_edge_functions(px, py, unnormalized_matrix_inverse, b_over_w); 380 | if (!pixel_is_inside_triangle(b_over_w)) { 381 | continue; 382 | } 383 | 384 | const float one_over_w = b_over_w[0] + b_over_w[1] + b_over_w[2]; 385 | const float b0 = b_over_w[0] / one_over_w; 386 | const float b1 = b_over_w[1] / one_over_w; 387 | const float b2 = b_over_w[2] / one_over_w; 388 | 389 | const float v0z = vertices_a[v0_id][2]; 390 | const float v1z = vertices_a[v1_id][2]; 391 | const float v2z = vertices_a[v2_id][2]; 392 | // Since we computed an unnormalized w above, we need to recompute 393 | // a properly scaled clip-space w value and then divide clip-space z 394 | // by that. 395 | const float clip_z = b0 * v0z + b1 * v1z + b2 * v2z; 396 | const float clip_w = b0 * v0w + b1 * v1w + b2 * v2w; 397 | const float z = clip_z / clip_w; 398 | 399 | // Skip the pixel if it is farther than the current z-buffer pixel or 400 | // beyond the near or far clipping plane. 401 | if (z < -1.0 || z > 1.0 || z > z_buffer_a[iy][ix]) { 402 | continue; 403 | } 404 | 405 | px_triangle_ids_a[iy][ix] = triangle_id; 406 | z_buffer_a[iy][ix] = z; 407 | px_barycentric_coords_a[iy][ix][0] = b0; 408 | px_barycentric_coords_a[iy][ix][1] = b1; 409 | px_barycentric_coords_a[iy][ix][2] = b2; 410 | } 411 | } 412 | } 413 | 414 | return { 415 | px_triangle_ids, 416 | px_barycentric_coords, 417 | z_buffer 418 | }; 419 | } 420 | 421 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 422 | m.def("forward", &rasterize_triangles_forward, "Rasterize forward"); 423 | m.def("backward", &rasterize_triangles_backward, "Rasterize backward"); 424 | } 425 | -------------------------------------------------------------------------------- /src/mesh_renderer/kernels/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils import cpp_extension 3 | 4 | setup(name="rasterize_triangles_cpp", 5 | ext_modules=[ 6 | cpp_extension.CppExtension( 7 | "rasterize_triangles_cpp", ["rasterize_triangles.cpp"]), 8 | ], 9 | cmdclass={"build_ext": cpp_extension.BuildExtension}) 10 | -------------------------------------------------------------------------------- /src/mesh_renderer/mesh_renderer_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from ..common import camera_utils 8 | from .. import mesh_renderer 9 | from . import test_utils 10 | 11 | class RenderTest(unittest.TestCase): 12 | def setUp(self): 13 | self.test_data_directory = os.path.join( 14 | os.path.dirname(os.path.abspath(__file__)), 15 | 'test_data' 16 | ) 17 | 18 | # Set up a basic cube centered at the origin, with vertex normals pointing 19 | # outwards along the line from the origin to the cube vertices: 20 | self.cube_vertices = torch.tensor( 21 | [[-1, -1, 1], [-1, -1, -1], [-1, 1, -1], [-1, 1, 1], [1, -1, 1], 22 | [1, -1, -1], [1, 1, -1], [1, 1, 1]], 23 | dtype=torch.float32) 24 | self.cube_normals = torch.nn.functional.normalize(self.cube_vertices, dim=1, p=2) 25 | self.cube_triangles = torch.tensor( 26 | [[0, 1, 2], [2, 3, 0], [3, 2, 6], [6, 7, 3], [7, 6, 5], [5, 4, 7], 27 | [4, 5, 1], [1, 0, 4], [5, 6, 2], [2, 1, 5], [7, 4, 0], [0, 3, 7]], 28 | dtype=torch.int32) 29 | 30 | def testRendersSimpleCube(self): 31 | """Renders a simple cube to test the full forward pass. 32 | 33 | Verifies the functionality of both the custom kernel and the python wrapper. 34 | """ 35 | 36 | model_transforms = camera_utils.euler_matrices( 37 | torch.tensor([[-20.0, 0.0, 60.0], [45.0, 60.0, 0.0]]))[:, :3, :3] 38 | 39 | vertices_world_space = torch.matmul( 40 | torch.stack([self.cube_vertices, self.cube_vertices]), 41 | model_transforms.transpose(1, 2) 42 | ) 43 | 44 | normals_world_space = torch.matmul( 45 | torch.stack([self.cube_normals, self.cube_normals]), 46 | model_transforms.transpose(1, 2) 47 | ) 48 | 49 | # camera position: 50 | eye = torch.tensor(2 * [[0.0, 0.0, 6.0]], dtype=torch.float32) 51 | center = torch.tensor(2 * [[0.0, 0.0, 0.0]], dtype=torch.float32) 52 | world_up = torch.tensor(2 * [[0.0, 1.0, 0.0]], dtype=torch.float32) 53 | image_width = 640 54 | image_height = 480 55 | light_positions = torch.tensor([[[0.0, 0.0, 6.0]], [[0.0, 0.0, 6.0]]]) 56 | light_intensities = torch.ones([2, 1, 3], dtype=torch.float32) 57 | vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32) 58 | 59 | images = mesh_renderer.render( 60 | vertices_world_space, self.cube_triangles, normals_world_space, 61 | vertex_diffuse_colors, eye, center, world_up, light_positions, 62 | light_intensities, image_width, image_height 63 | ) 64 | 65 | for image_id in range(images.shape[0]): 66 | target_image_name = 'Gray_Cube_%i.png' % image_id 67 | baseline_image_path = os.path.join(self.test_data_directory, 68 | target_image_name) 69 | test_utils.expect_image_file_and_render_are_near( 70 | self, baseline_image_path, images[image_id, :, :, :]) 71 | 72 | def testComplexShading(self): 73 | """Tests specular highlights, colors, and multiple lights per image.""" 74 | # rotate the cube for the test: 75 | model_transforms = camera_utils.euler_matrices( 76 | torch.tensor([[-20.0, 0.0, 60.0], [45.0, 60.0, 0.0]]))[:, :3, :3] 77 | 78 | vertices_world_space = torch.matmul( 79 | torch.stack([self.cube_vertices, self.cube_vertices]), 80 | model_transforms.transpose(1, 2) 81 | ) 82 | 83 | normals_world_space = torch.matmul( 84 | torch.stack([self.cube_normals, self.cube_normals]), 85 | model_transforms.transpose(1, 2) 86 | ) 87 | 88 | # camera position: 89 | eye = torch.tensor([[0.0, 0.0, 6.0], [0., 0.2, 18.0]], dtype=torch.float32) 90 | center = torch.tensor([[0.0, 0.0, 0.0], [0.1, -0.1, 0.1]], dtype=torch.float32) 91 | world_up = torch.tensor( 92 | [[0.0, 1.0, 0.0], [0.1, 1.0, 0.15]], dtype=torch.float32) 93 | fov_y = torch.tensor([40., 13.3], dtype=torch.float32) 94 | near_clip = torch.tensor(0.1, dtype=torch.float32) 95 | far_clip = torch.tensor(25.0, dtype=torch.float32) 96 | image_width = 640 97 | image_height = 480 98 | light_positions = torch.tensor([[[0.0, 0.0, 6.0], [1.0, 2.0, 6.0]], 99 | [[0.0, -2.0, 4.0], [1.0, 3.0, 4.0]]]) 100 | light_intensities = torch.tensor( 101 | [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]], [[2.0, 0.0, 1.0], [0.0, 2.0, 102 | 1.0]]], 103 | dtype=torch.float32) 104 | vertex_diffuse_colors = torch.tensor(2*[[[1.0, 0.0, 0.0], 105 | [0.0, 1.0, 0.0], 106 | [0.0, 0.0, 1.0], 107 | [1.0, 1.0, 1.0], 108 | [1.0, 1.0, 0.0], 109 | [1.0, 0.0, 1.0], 110 | [0.0, 1.0, 1.0], 111 | [0.5, 0.5, 0.5]]], 112 | dtype=torch.float32) 113 | vertex_specular_colors = torch.tensor(2*[[[0.0, 1.0, 0.0], 114 | [0.0, 0.0, 1.0], 115 | [1.0, 1.0, 1.0], 116 | [1.0, 1.0, 0.0], 117 | [1.0, 0.0, 1.0], 118 | [0.0, 1.0, 1.0], 119 | [0.5, 0.5, 0.5], 120 | [1.0, 0.0, 0.0]]], 121 | dtype=torch.float32) 122 | shininess_coefficients = 6.0 * torch.ones([2, 8], dtype=torch.float32) 123 | ambient_color = torch.tensor( 124 | [[0., 0., 0.], [0.1, 0.1, 0.2]], dtype=torch.float32) 125 | renders = mesh_renderer.render( 126 | vertices_world_space, self.cube_triangles, normals_world_space, 127 | vertex_diffuse_colors, eye, center, world_up, light_positions, 128 | light_intensities, image_width, image_height, vertex_specular_colors, 129 | shininess_coefficients, ambient_color, fov_y, near_clip, far_clip) 130 | tonemapped_renders = torch.cat( 131 | [ 132 | mesh_renderer.tone_mapper(renders[:, :, :, 0:3], 0.7), 133 | renders[:, :, :, 3:4] 134 | ], 135 | dim=3) 136 | 137 | # Check that shininess coefficient broadcasting works by also rendering 138 | # with a scalar shininess coefficient, and ensuring the result is identical: 139 | broadcasted_renders = mesh_renderer.render( 140 | vertices_world_space, self.cube_triangles, normals_world_space, 141 | vertex_diffuse_colors, eye, center, world_up, light_positions, 142 | light_intensities, image_width, image_height, vertex_specular_colors, 143 | 6.0, ambient_color, fov_y, near_clip, far_clip) 144 | tonemapped_broadcasted_renders = torch.cat( 145 | [ 146 | mesh_renderer.tone_mapper(broadcasted_renders[:, :, :, 0:3], 0.7), 147 | broadcasted_renders[:, :, :, 3:4] 148 | ], 149 | dim=3) 150 | 151 | def testFullRenderGradientComputation(self): 152 | """Verifies the Jacobian matrix for the entire renderer. 153 | 154 | This ensures correct gradients are propagated backwards through the entire 155 | process, not just through the rasterization kernel. Uses the simple cube 156 | forward pass. 157 | """ 158 | image_height = 21 159 | image_width = 28 160 | 161 | def render_cube_vertices(cube_vertices): 162 | # rotate the cube for the test: 163 | model_transforms = camera_utils.euler_matrices( 164 | torch.tensor([[-20.0, 0.0, 60.0], [45.0, 60.0, 0.0]]))[:, :3, :3] 165 | 166 | vertices_world_space = torch.matmul( 167 | torch.stack([cube_vertices, cube_vertices]), 168 | model_transforms.transpose(1, 2)) 169 | 170 | normals_world_space = torch.matmul( 171 | torch.stack([self.cube_normals, self.cube_normals]), 172 | model_transforms.transpose(1, 2)) 173 | 174 | # camera position: 175 | eye = torch.tensor([0.0, 0.0, 6.0], dtype=torch.float32) 176 | center = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32) 177 | world_up = torch.tensor([0.0, 1.0, 0.0], dtype=torch.float32) 178 | 179 | # Scene has a single light from the viewer's eye. 180 | light_positions = torch.unsqueeze(torch.stack([eye, eye], dim=0), dim=1) 181 | light_intensities = torch.ones([2, 1, 3], dtype=torch.float32) 182 | 183 | vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32) 184 | 185 | rendered = mesh_renderer.render( 186 | vertices_world_space, self.cube_triangles, normals_world_space, 187 | vertex_diffuse_colors, eye, center, world_up, light_positions, 188 | light_intensities, image_width, image_height) 189 | return rendered 190 | 191 | test_cube_vertices = torch.tensor(self.cube_vertices, requires_grad=True) 192 | analytical = test_utils.get_analytical_jacobian( 193 | test_cube_vertices, render_cube_vertices(test_cube_vertices)) 194 | numerical = test_utils.get_numerical_jacobian( 195 | render_cube_vertices, test_cube_vertices, eps=1e-3) 196 | jacobians_match = ( 197 | test_utils.check_jacobians_are_nearly_equal( 198 | analytical, numerical, 0.01, 0.01)) 199 | self.assertTrue( 200 | jacobians_match, 201 | "Analytical and numerical jacobians have too many relative or " 202 | "absolute outliers") 203 | 204 | def testThatCubeRotates(self): 205 | """Optimize a simple cube's rotation using pixel loss. 206 | 207 | The rotation is represented as static-basis euler angles. This test checks 208 | that the computed gradients are useful. 209 | """ 210 | image_height = 480 211 | image_width = 640 212 | initial_euler_angles = [[0.0, 0.0, 0.0]] 213 | euler_angles = torch.tensor(initial_euler_angles, requires_grad=True) 214 | 215 | def render_cube_with_rotation(input_euler_angles): 216 | model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3] 217 | 218 | vertices_world_space = torch.reshape( 219 | torch.matmul(self.cube_vertices, model_rotation.T), 220 | [1, 8, 3]) 221 | 222 | normals_world_space = torch.reshape( 223 | torch.matmul(self.cube_normals, model_rotation.T), 224 | [1, 8, 3]) 225 | 226 | # camera position: 227 | eye = torch.tensor([[0.0, 0.0, 6.0]], dtype=torch.float32) 228 | center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32) 229 | world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32) 230 | 231 | vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32) 232 | light_positions = torch.reshape(eye, [1, 1, 3]) 233 | light_intensities = torch.ones([1, 1, 3], dtype=torch.float32) 234 | 235 | render = mesh_renderer.render( 236 | vertices_world_space, self.cube_triangles, normals_world_space, 237 | vertex_diffuse_colors, eye, center, world_up, light_positions, 238 | light_intensities, image_width, image_height) 239 | render = torch.reshape(render, [image_height, image_width, 4]) 240 | return render 241 | 242 | # Pick the desired cube rotation for the test: 243 | target_euler_angles = torch.tensor([[-20.0, 0.0, 60.0]]) 244 | desired_render = render_cube_with_rotation(target_euler_angles) 245 | 246 | optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1) 247 | def stepfn(): 248 | optimizer.zero_grad() 249 | render = render_cube_with_rotation(euler_angles) 250 | loss = torch.mean(torch.abs(render - desired_render)) 251 | loss.backward() 252 | torch.nn.utils.clip_grad_norm_([euler_angles], 1.0) 253 | return loss 254 | 255 | for _ in range(35): 256 | optimizer.step(stepfn) 257 | 258 | final_render = render_cube_with_rotation(euler_angles) 259 | desired_render = render_cube_with_rotation(target_euler_angles) # sanity check re-rendering target angles is the same 260 | 261 | target_image_name = 'Gray_Cube_0.png' 262 | baseline_image_path = os.path.join(self.test_data_directory, 263 | target_image_name) 264 | test_utils.expect_image_file_and_render_are_near( 265 | self, baseline_image_path, desired_render) 266 | test_utils.expect_image_file_and_render_are_near( 267 | self, 268 | baseline_image_path, 269 | final_render.detach(), 270 | max_outlier_fraction=0.01, 271 | pixel_error_threshold=0.04) 272 | 273 | 274 | if __name__ == "__main__": 275 | unittest.main() -------------------------------------------------------------------------------- /src/mesh_renderer/rasterize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Differentiable triangle rasterizer using Genova 2018 un-clipped 3 | barycentric formulation. 4 | """ 5 | 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | 12 | from ..common import camera_utils 13 | 14 | USE_CPP_RASTERIZER = False 15 | def rasterize_barycentric(clip_space_vertices, triangles, image_width, image_height): 16 | if USE_CPP_RASTERIZER: 17 | from . import rasterize_triangles_ext 18 | return rasterize_triangles_ext.BarycentricRasterizer.apply( 19 | clip_space_vertices, triangles, image_width, image_height 20 | ) 21 | else: 22 | from . import rasterize_triangles_python 23 | return rasterize_triangles_python.rasterize_barycentric( 24 | clip_space_vertices, triangles, image_width, image_height 25 | ) 26 | 27 | def rasterize(world_space_vertices, attributes, triangles, 28 | camera_matrices, image_width, image_height, background_value): 29 | """Rasterize a mesh and compute interpolated vertex attributes. 30 | 31 | Applies projection matrices and then calls rasterize_clip_space(). 32 | 33 | Args: 34 | world_space_vertices: 3D float32 tensor of xyz positions with shape 35 | [batch_size, vertex_count, 3]. 36 | attributes: 3D float32 tensor with shape [batch_size, vertex_count, 37 | attribute_count]. Each vertex attribute is interpolated across 38 | the triangle using barycentric interpolation. 39 | triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet 40 | should contain vertex indices describing a triangle such that the 41 | triangle's normal points toward the viewer if the forward order of 42 | the triplet defines a clockwise winding of the vertices. Gradients 43 | with respect to this tensor are not available. 44 | # TODO: make this use CCW ordering to be consistent with soft_mesh_renderer 45 | camera_matrices: 3D float tensor with shape [batch_size, 4, 4] containing 46 | model-view-perspective projection matrices. 47 | image_width: int specifying desired output image width in pixels. 48 | image_height: int specifying desired output image height in pixels. 49 | background_value: a 1D float32 tensor with shape [attribute_count]. 50 | Pixels that lie outside all triangles take this value. 51 | 52 | Returns: 53 | A 4D float32 tensor with shape [batch_size, image_height, image_width, 54 | attribute_count], containing the interpolated vertex attributes at each 55 | pixel. 56 | 57 | Raises: 58 | ValueError: An invalid argument to the method is detected. 59 | """ 60 | clip_space_vertices = camera_utils.transform_homogeneous( 61 | camera_matrices, world_space_vertices) 62 | return rasterize_clip_space(clip_space_vertices, attributes, triangles, 63 | image_width, image_height, background_value) 64 | 65 | 66 | def rasterize_clip_space(clip_space_vertices, attributes, triangles, 67 | image_width, image_height, background_value): 68 | """Rasterize the input mesh expressed in clip-space (xyzw) coordinates. 69 | 70 | Interpolates vertex attributes using perspective-correct interpolation 71 | and clips triangles that lie outside the viewing frustum. 72 | 73 | Args: 74 | clip_space_vertices: 3D float32 tensor of homogeneous vertices (xyzw) 75 | with shape [batch_size, vertex_count, 4]. 76 | attributes: 3D float32 tensor with shape [batch_size, vertex_count, 77 | attribute_count]. Each vertex attribute is interpolated across the 78 | triangle using barycentric interpolation. 79 | triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet 80 | should contain vertex indices describing a triangle such that the 81 | triangle's normal points toward the viewer if the forward order of 82 | the triplet defines a clockwise winding of the vertices. Gradients 83 | with respect to this tensor are not available. 84 | # TODO: make this use CCW ordering to be consistent with soft_mesh_renderer 85 | image_width: int specifying desired output image width in pixels. 86 | image_height: int specifying desired output image height in pixels. 87 | background_value: a 1D float32 tensor with shape [attribute_count]. 88 | Pixels that lie outside all triangles take this value. 89 | 90 | Returns: 91 | A 4D float32 tensor with shape [batch_size, image_height, image_width, 92 | attribute_count], containing the interpolated vertex attributes at each 93 | pixel. 94 | 95 | Raises: 96 | ValueError: An invalid argument to the method is detected. 97 | """ 98 | if not image_width > 0: 99 | raise ValueError("Image width must be > 0.") 100 | if not image_height > 0: 101 | raise ValueError("Image height must be > 0.") 102 | if len(clip_space_vertices.shape) != 3: 103 | raise ValueError("The vertex buffer must be 3D.") 104 | 105 | vertex_count = clip_space_vertices.shape[1] 106 | 107 | batch_size = clip_space_vertices.shape[0] 108 | 109 | per_image_barycentric_coordinates = [] 110 | per_image_vertex_ids = [] 111 | 112 | for b in range(batch_size): 113 | px_triangle_ids, px_barycentric_coords, _ = rasterize_barycentric( 114 | clip_space_vertices[b, :, :], triangles, image_width, image_height) 115 | per_image_barycentric_coordinates.append( 116 | torch.reshape(px_barycentric_coords, [-1, 3])) # [pixel_count, 3] 117 | 118 | vertex_ids = torch.index_select( 119 | triangles, 0, torch.reshape(px_triangle_ids, [-1]).long()) # [pixel_count, 3] 120 | reindexed_ids = vertex_ids + b * clip_space_vertices.shape[1] 121 | per_image_vertex_ids.append(reindexed_ids) 122 | 123 | barycentric_coordinates = torch.reshape( 124 | torch.stack(per_image_barycentric_coordinates, 0), [-1, 3]) 125 | vertex_ids = torch.reshape( 126 | torch.stack(per_image_vertex_ids, 0), [-1, 3]) 127 | 128 | # Indexes with each pixel's clip-space triangle's extrema (the pixel's 129 | # 'corner points') ids to get the relevant properties for deferred shading. 130 | flattened_vertex_attributes = torch.reshape(attributes, 131 | [batch_size * vertex_count, -1]) 132 | corner_attributes = flattened_vertex_attributes[vertex_ids.long()] 133 | 134 | # Computes the pixel attributes by interpolating the known attributes at 135 | # the corner points of the triangle interpolated with the 136 | # barycentric coordinates. 137 | weighted_vertex_attributes = torch.mul(corner_attributes, 138 | torch.unsqueeze(barycentric_coordinates, 2)) 139 | summed_attributes = torch.sum(weighted_vertex_attributes, dim=1) 140 | attribute_images = torch.reshape(summed_attributes, 141 | [batch_size, image_height, image_width, -1]) 142 | 143 | # Barycentric coordinates should approximately sum to one where there is 144 | # rendered geometry, but be exactly zero where there is not. 145 | alphas = torch.clamp( 146 | torch.sum(2.0 * barycentric_coordinates, dim=1), 0.0, 1.0) 147 | alphas = torch.reshape(alphas, [batch_size, image_height, image_width, 1]) 148 | 149 | attributes_with_background = ( 150 | alphas * attribute_images + (1.0 - alphas) * background_value) 151 | 152 | return attributes_with_background 153 | -------------------------------------------------------------------------------- /src/mesh_renderer/rasterize_triangles_ext.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import rasterize_triangles_cpp 4 | 5 | 6 | class BarycentricRasterizer(torch.autograd.Function): 7 | @staticmethod 8 | def forward(ctx, clip_space_vertices, triangles, image_width, image_height): 9 | """Rasterize the input mesh expressed in clip-space (xyzw) coordinates. 10 | 11 | Interpolates barycentric coordinates using perspective-correct interpolation 12 | and clips triangles that lie outside the viewing frustum. 13 | 14 | Args: 15 | clip_space_vertices: 2D float32 tensor of homogeneous vertices (xyzw) 16 | with shape [vertex_count, 4]. 17 | triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet 18 | should contain vertex indices describing a triangle such that the 19 | triangle's normal points toward the viewer if the forward order of 20 | the triplet defines a clockwise winding of the vertices. Gradients 21 | with respect to this tensor are not available. 22 | image_width: int specifying desired output image width in pixels. 23 | image_height: int specifying desired output image height in pixels. 24 | 25 | Returns: 26 | px_triangle_ids: A 2D tensor with shape [image_height, image_width]. 27 | At return, each pixel contains a triangle id in the range 28 | [0, triangle_count). The id value is also 0 if there is no triangle 29 | at the pixel. The px_barycentric_coordinates must be checked to distinguish 30 | between the two cases. 31 | px_barycentric_coordinates: A 3D tensor with 32 | shape [image_height, image_width, 3]. At return, contains the triplet of 33 | barycentric coordinates at each pixel in the same vertex ordering as 34 | triangles. If no triangle is present, all coordinates are 0. 35 | z_buffer: A 2D tensor with shape [image_height, image_width] elements. At 36 | return, contains the normalized device Z coordinates of the rendered 37 | triangles. 38 | """ 39 | px_triangle_ids, px_barycentric_coords, z_buffer = rasterize_triangles_cpp.forward( 40 | clip_space_vertices, triangles, image_width, image_height) 41 | ctx.save_for_backward(clip_space_vertices, triangles, 42 | px_triangle_ids, px_barycentric_coords) 43 | return px_triangle_ids, px_barycentric_coords, z_buffer 44 | 45 | @staticmethod 46 | def backward(ctx, _, df_dbarycentric_coords, __): 47 | """Get the gradient of a scalar loss function w.r.t. input vertices 48 | expressed in clip-space (xyzw) coordinates. 49 | In the backward pass we receive a Tensor containing the gradient of the 50 | loss function w.r.t. our barycentric coordinate output and compute 51 | the gradient of the loss w.r.t. each vertex. 52 | 53 | Gradients w.r.t. triangle_ids or image width or height are not available. 54 | """ 55 | clip_space_vertices, triangles, px_triangle_ids, px_barycentric_coords = ctx.saved_tensors 56 | output = rasterize_triangles_cpp.backward( 57 | df_dbarycentric_coords, 58 | clip_space_vertices, 59 | triangles, 60 | px_triangle_ids, 61 | px_barycentric_coords) 62 | df_dvertices, = output 63 | return df_dvertices, torch.zeros_like(triangles), None, None -------------------------------------------------------------------------------- /src/mesh_renderer/rasterize_triangles_python.py: -------------------------------------------------------------------------------- 1 | from ..common import camera_utils 2 | import torch 3 | import math 4 | 5 | """ 6 | rasterize_triangles_soft.py 7 | 8 | Implements BarycentricRasterizer with PyTorch-only primitives (no C++ extensions). 9 | """ 10 | 11 | # Returns a 4x4 viewport matrix which can be used to convert 3D homogeneous points in clip space to screen space, 12 | # specified by args: 13 | # - x: integer giving the screen space X offset 14 | # - y: integer giving the screen space Y offset 15 | # - w: integer giving the total screen space width 16 | # - h: integer giving the total screen space height 17 | # - z_buffer_res: number giving resolution of the z-buffer 18 | # 19 | # The bi-unit cube [-1, 1]*[-1, 1]*[-1, 1] should be mapped onto the screen cube [x, x+w]*[y, y+h]*[0, z_buffer_res]. 20 | def viewport(x, y, w, h, z_buffer_res): 21 | res = torch.eye(4) 22 | res[0][0] = w/2.0 23 | res[1][1] = h/2.0 24 | res[2][2] = z_buffer_res/2.0 25 | res[0:3, 3] = torch.tensor([x + w/2.0, y + h/2.0, z_buffer_res / 2.0]) 26 | return res 27 | 28 | # Returns barycentric coordinates of a 3D point P w.r.t. triangle v0, v1, v2. 29 | # The input `M_inv` should be the inverse of a 3x3 matrix where the columns are the vertices. 30 | def barycentric(M_inv, p): 31 | return M_inv @ p 32 | 33 | def rasterize_barycentric(clip_space_vertices, triangles, image_width, image_height): 34 | """Rasterize the input mesh expressed in clip-space (xyzw) coordinates. 35 | 36 | Interpolates barycentric coordinates using perspective-correct interpolation 37 | and clips triangles that lie outside the viewing frustum. 38 | 39 | Args: 40 | clip_space_vertices: 2D float32 tensor of homogeneous vertices (xyzw) 41 | with shape [vertex_count, 4]. 42 | triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet 43 | should contain vertex indices describing a triangle such that the 44 | triangle's normal points toward the viewer if the forward order of 45 | the triplet defines a clockwise winding of the vertices. Gradients 46 | with respect to this tensor are not available. 47 | image_width: int specifying desired output image width in pixels. 48 | image_height: int specifying desired output image height in pixels. 49 | 50 | Returns: 51 | px_triangle_ids: A 2D tensor with shape [image_height, image_width]. 52 | At return, each pixel contains a triangle id in the range 53 | [0, triangle_count). The id value is also 0 if there is no triangle 54 | at the pixel. The px_barycentric_coordinates must be checked to distinguish 55 | between the two cases. 56 | px_barycentric_coordinates: A 3D tensor with 57 | shape [image_height, image_width, 3]. At return, contains the triplet of 58 | barycentric coordinates at each pixel in the same vertex ordering as 59 | triangles. If no triangle is present, all coordinates are 0. 60 | z_buffer: A 2D tensor with shape [image_height, image_width] elements. At 61 | return, contains the normalized device Z coordinates of the rendered 62 | triangles. 63 | """ 64 | z_buffer = torch.ones([image_height, image_width], dtype=torch.float32) 65 | px_triangle_ids = torch.zeros([image_height, image_width], dtype=torch.int32) 66 | px_barycentric_coordinates = torch.zeros([image_height, image_width, 3], dtype=torch.float32) 67 | 68 | # z-buffer ranges from 0.0 to 1.0, anything outside gets clipped 69 | z_buffer_res = 1.0 70 | viewport_mat = viewport(0., 0., image_width, image_height, z_buffer_res) 71 | px_M = torch.zeros(3, 3) 72 | 73 | for triangle_id in range(len(triangles)): 74 | triangle = triangles[triangle_id] 75 | proj_v012 = clip_space_vertices[triangle] # shape: [3, 4] 76 | proj_v012_w = proj_v012[:,[3]] # shape: [3, 1] 77 | 78 | # clip space to screen space 79 | px_v012 = (viewport_mat @ (proj_v012 / (proj_v012_w)).T).T[:,:3] 80 | 81 | # get bbox in screen-space 82 | minx = math.floor( 83 | max(0, min(px_v012[0][0], px_v012[1][0], px_v012[2][0], image_width)) 84 | ) 85 | miny = math.floor( 86 | max(0, min(px_v012[0][1], px_v012[1][1], px_v012[2][1], image_height)) 87 | ) 88 | maxx = math.ceil( 89 | min(image_width, max(px_v012[0][0], px_v012[1][0], px_v012[2][0], 0)) 90 | ) 91 | maxy = math.ceil( 92 | min(image_height, max(px_v012[0][1], px_v012[1][1], px_v012[2][1], 0)) 93 | ) 94 | 95 | px_M[:] = px_v012.T 96 | px_M[2,:] = torch.tensor([1., 1., 1.]) 97 | try: 98 | px_M_inv = px_M.inverse() 99 | except Exception: 100 | # Screen-space vertex basis is not invertible, meaning triangle is 101 | # degenerate when projected (zero area). Skip rendering 102 | continue 103 | 104 | did_draw = 0 105 | # Depths of the screen-space vertices as suitable for z-test. 106 | # Note that depth is inversely proportional to the vertex eye-space z-coordinate. 107 | vertex_depths = px_v012[:,2] 108 | 109 | for y in range(miny, maxy): 110 | if y<0 or y>=image_height: 111 | continue 112 | for x in range(minx, maxx): 113 | if x<0 or x>=image_width: 114 | continue 115 | p = torch.tensor([x + 0.5, y + 0.5, 1.]) 116 | bc_screen = barycentric(px_M_inv, p) 117 | if bc_screen[0] < 0 or bc_screen[1] < 0 or bc_screen[2] < 0: 118 | # pixel is not inside triangle 119 | continue 120 | else: 121 | # get perspective-correct barycentric coordinates 122 | bc = torch.nn.functional.normalize(bc_screen / proj_v012_w.T[0], dim=0, p=1) 123 | # TODO: shouldn't this be perspective-corrected z? bug? 124 | z = vertex_depths @ bc_screen 125 | if z < 0.0 or z > 1.0 or z > z_buffer[y][x]: 126 | continue 127 | did_draw += 1 128 | z_buffer[y][x] = z 129 | px_triangle_ids[y][x] = triangle_id 130 | px_barycentric_coordinates[y][x] = bc 131 | print("drew {} pixels for triangle {}".format(did_draw, triangle_id)) 132 | 133 | return px_triangle_ids, px_barycentric_coordinates, z_buffer 134 | -------------------------------------------------------------------------------- /src/mesh_renderer/rasterize_triangles_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import unittest 7 | 8 | import numpy as np 9 | import torch 10 | 11 | from ..common import camera_utils 12 | from .rasterize import rasterize, rasterize_barycentric 13 | from . import test_utils 14 | 15 | 16 | class RenderTest(unittest.TestCase): 17 | def setUp(self): 18 | self.test_data_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "./test_data/") 19 | 20 | self.cube_vertex_positions = torch.tensor( 21 | [[-1, -1, 1], [-1, -1, -1], [-1, 1, -1], [-1, 1, 1], [1, -1, 1], 22 | [1, -1, -1], [1, 1, -1], [1, 1, 1]], 23 | dtype=torch.float32) 24 | self.cube_triangles = torch.tensor( 25 | [[0, 1, 2], [2, 3, 0], [3, 2, 6], [6, 7, 3], [7, 6, 5], [5, 4, 7], 26 | [4, 5, 1], [1, 0, 4], [5, 6, 2], [2, 1, 5], [7, 4, 0], [0, 3, 7]], 27 | dtype=torch.int32) 28 | 29 | self.image_width = 640 30 | self.image_height = 480 31 | 32 | self.perspective = camera_utils.perspective( 33 | self.image_width / self.image_height, 34 | torch.tensor([40.0]), torch.tensor([0.01]), 35 | torch.tensor([10.0])) 36 | 37 | def runTriangleTest(self, w_vector, target_image_name): 38 | """Directly renders a rasterized triangle's barycentric coordinates. 39 | 40 | Tests only the kernel (rasterize_triangles_module). 41 | 42 | Args: 43 | w_vector: 3-vector of w components to scale triangle vertices. 44 | target_image_name: image file name to compare result against. 45 | """ 46 | clip_init = np.array( 47 | [ 48 | [-0.5, -0.5, 0.8, 1.0], 49 | [0.0, 0.5, 0.3, 1.0], 50 | [0.5, -0.5, 0.3, 1.0] 51 | ], dtype=np.float32) 52 | clip_init = clip_init * np.reshape( 53 | np.array(w_vector, dtype=np.float32), [3, 1]) 54 | 55 | clip_coordinates = torch.tensor(clip_init) 56 | triangles = torch.tensor([[0, 1, 2]], dtype=torch.int32) 57 | 58 | _, barycentric_coords, _ = ( 59 | rasterize_barycentric( 60 | clip_coordinates, 61 | triangles, 62 | self.image_width, 63 | self.image_height)) 64 | image = torch.cat( 65 | [barycentric_coords, 66 | torch.ones([self.image_height, self.image_width, 1])], dim=2) 67 | baseline_image_path = os.path.join(self.test_data_directory, 68 | target_image_name) 69 | test_utils.expect_image_file_and_render_are_near( 70 | self, baseline_image_path, image) 71 | 72 | def testRendersSimpleTriangle(self): 73 | self.runTriangleTest((1.0, 1.0, 1.0), "Simple_Triangle.png") 74 | 75 | def testRendersPerspectiveCorrectTriangle(self): 76 | self.runTriangleTest((0.2, 0.5, 2.0), 77 | "Perspective_Corrected_Triangle.png") 78 | 79 | def testRendersTwoCubesInBatch(self): 80 | """Renders a simple cube in two viewpoints to test the python wrapper. 81 | """ 82 | 83 | vertex_rgb = (self.cube_vertex_positions * 0.5 + 0.5) 84 | vertex_rgba = torch.cat([vertex_rgb, torch.ones([8, 1])], dim=1) 85 | 86 | center = torch.tensor([[0, 0, 0]], dtype=torch.float32) 87 | world_up = torch.tensor([[0, 1, 0]], dtype=torch.float32) 88 | look_at_1 = camera_utils.look_at( 89 | torch.tensor([[2, 3, 6]], dtype=torch.float32), 90 | center, 91 | world_up) 92 | look_at_2 = camera_utils.look_at( 93 | torch.tensor([[-3, 1, 6]], dtype=torch.float32), 94 | center, 95 | world_up) 96 | projection_1 = torch.matmul(self.perspective, look_at_1) 97 | projection_2 = torch.matmul(self.perspective, look_at_2) 98 | projection = torch.cat([projection_1, projection_2], dim=0) 99 | background_value = torch.Tensor([0., 0., 0., 0.]) 100 | 101 | rendered = rasterize( 102 | torch.stack([self.cube_vertex_positions, 103 | self.cube_vertex_positions]), 104 | torch.stack([vertex_rgba, vertex_rgba]), 105 | self.cube_triangles, 106 | projection, 107 | self.image_width, 108 | self.image_height, 109 | background_value) 110 | 111 | for i in (0, 1): 112 | image = rendered[i, :, :, :] 113 | baseline_image_name = "Unlit_Cube_{}.png".format(i) 114 | baseline_image_path = os.path.join(self.test_data_directory, 115 | baseline_image_name) 116 | test_utils.expect_image_file_and_render_are_near( 117 | self, baseline_image_path, image) 118 | 119 | def testSimpleTriangleGradientComputation(self): 120 | """Verify the Jacobian matrix for a single pixel. 121 | 122 | The pixel is in the center of a triangle facing the camera. This makes 123 | it easy to check which entries of the Jacobian might not make sense 124 | without worrying about corner cases. 125 | """ 126 | test_pixel_x = 325 127 | test_pixel_y = 245 128 | 129 | triangles = torch.tensor([[0, 1, 2]], dtype=torch.int32) 130 | 131 | def rasterize_test_pixels(clip_coordinates): 132 | _, barycentric_coords, _ = ( 133 | rasterize_barycentric( 134 | clip_coordinates, 135 | triangles, 136 | self.image_width, 137 | self.image_height)) 138 | 139 | pixels_to_compare = barycentric_coords[ 140 | test_pixel_y: test_pixel_y + 1, test_pixel_x: test_pixel_x + 1, :] 141 | return pixels_to_compare 142 | 143 | test_clip_coordinates = torch.tensor( 144 | [[-0.5, -0.5, 0.8, 1.0], 145 | [0.0, 0.5, 0.3, 1.0], 146 | [0.5, -0.5, 0.3, 1.0]], 147 | dtype=torch.float32, 148 | requires_grad=True) 149 | jacobians_match = torch.autograd.gradcheck( 150 | rasterize_test_pixels, 151 | test_clip_coordinates, 152 | eps=4e-2, 153 | atol=0.1, 154 | rtol=0.01) 155 | self.assertTrue( 156 | jacobians_match, 157 | "Analytical and numerical jacobians have too many relative or " 158 | "absolute outliers") 159 | 160 | def testInternalRenderGradientComputation(self): 161 | """Isolates and verifies the Jacobian matrix for the custom kernel.""" 162 | image_height = 21 163 | image_width = 28 164 | 165 | def get_barycentric_coordinates(clip_coordinates): 166 | _, barycentric_coords, _ = ( 167 | rasterize_barycentric( 168 | clip_coordinates, 169 | self.cube_triangles, 170 | image_width, 171 | image_height)) 172 | return barycentric_coords 173 | 174 | # Precomputed transformation of the simple cube to normalized device 175 | # coordinates, in order to isolate the rasterization gradient. 176 | test_clip_coordinates = torch.tensor( 177 | [[-0.43889722, -0.53184521, 0.85293502, 1.0], 178 | [-0.37635487, 0.22206162, 0.90555805, 1.0], 179 | [-0.22849123, 0.76811147, 0.80993629, 1.0], 180 | [-0.2805393, -0.14092168, 0.71602166, 1.0], 181 | [0.18631913, -0.62634289, 0.88603103, 1.0], 182 | [0.16183566, 0.08129397, 0.93020856, 1.0], 183 | [0.44147962, 0.53497446, 0.85076219, 1.0], 184 | [0.53008741, -0.31276882, 0.77620775, 1.0]], 185 | dtype=torch.float32, 186 | requires_grad=True) 187 | raster_out = get_barycentric_coordinates(test_clip_coordinates) 188 | analytical = test_utils.get_analytical_jacobian( 189 | test_clip_coordinates, raster_out) 190 | numerical = test_utils.get_numerical_jacobian( 191 | get_barycentric_coordinates, test_clip_coordinates, eps=4e-2) 192 | 193 | jacobians_match = ( 194 | test_utils.check_jacobians_are_nearly_equal( 195 | analytical, numerical, 0.01, 0.01)) 196 | self.assertTrue( 197 | jacobians_match, 198 | "Analytical and numerical jacobians have too many relative or " 199 | "absolute outliers") 200 | 201 | 202 | if __name__ == "__main__": 203 | unittest.main() 204 | -------------------------------------------------------------------------------- /src/mesh_renderer/render.py: -------------------------------------------------------------------------------- 1 | """ 2 | Differentiable 3D rendering of a triangle mesh based on the 3 | sampled un-clipped barycentric approach from Genova 2018. 4 | """ 5 | 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | 12 | from ..common import camera_utils 13 | from .rasterize import rasterize 14 | 15 | 16 | def render( 17 | vertices, 18 | triangles, 19 | normals, 20 | diffuse_colors, 21 | camera_position, 22 | camera_lookat, 23 | camera_up, 24 | light_positions, 25 | light_intensities, 26 | image_width, 27 | image_height, 28 | specular_colors=None, 29 | shininess_coefficients=None, 30 | ambient_color=None, 31 | fov_y=40.0, 32 | near_clip=0.01, 33 | far_clip=10.0): 34 | """Renders an input scene using phong shading, and returns an output image. 35 | 36 | Args: 37 | vertices: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each 38 | triplet is an xyz position in world space. 39 | triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet 40 | should contain vertex indices describing a triangle such that the 41 | triangle's normal points toward the viewer if the forward order of the 42 | triplet defines a clockwise winding of the vertices. Gradients with 43 | respect to this tensor are not available. 44 | # TODO: make this use CCW ordering to be consistent with soft_mesh_renderer 45 | normals: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each 46 | triplet is the xyz vertex normal for its corresponding vertex. Each 47 | vector is assumed to be already normalized. 48 | diffuse_colors: 3D float32 tensor with shape [batch_size, 49 | vertex_count, 3]. The RGB diffuse reflection in the range [0, 1] for 50 | each vertex. 51 | camera_position: 2D tensor with shape [batch_size, 3] or 1D tensor with 52 | shape [3] specifying the XYZ world space camera position. 53 | camera_lookat: 2D tensor with shape [batch_size, 3] or 1D tensor with 54 | shape [3] containing an XYZ point along the center of the camera's gaze. 55 | camera_up: 2D tensor with shape [batch_size, 3] or 1D tensor with shape 56 | [3] containing the up direction for the camera. The camera will have 57 | no tilt with respect to this direction. 58 | light_positions: a 3D tensor with shape [batch_size, light_count, 3]. The 59 | XYZ position of each light in the scene. In the same coordinate space as 60 | pixel_positions. 61 | light_intensities: a 3D tensor with shape [batch_size, light_count, 3]. 62 | The RGB intensity values for each light. Intensities may be above 1. 63 | image_width: int specifying desired output image width in pixels. 64 | image_height: int specifying desired output image height in pixels. 65 | specular_colors: 3D float32 tensor with shape [batch_size, 66 | vertex_count, 3]. The RGB specular reflection in the range [0, 1] for 67 | each vertex. If supplied, specular reflections will be computed, and 68 | both specular colors and shininess_coefficients are expected. 69 | shininess_coefficients: a 0D-2D float32 tensor with maximum shape 70 | [batch_size, vertex_count]. The phong shininess coefficient of each 71 | vertex. A 0D tensor or float gives a constant shininess coefficient of 72 | all vertices across all batches and images. A 1D tensor must have shape 73 | [batch_size], and a single shininess coefficient per image is used. 74 | ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient 75 | color, which is added to each pixel in the scene. If None, it is 76 | assumed to be black. 77 | fov_y: float, 0D tensor, or 1D tensor with shape [batch_size] specifying 78 | desired output image y field of view in degrees. 79 | near_clip: float, 0D tensor, or 1D tensor with shape [batch_size] 80 | specifying near clipping plane distance. 81 | far_clip: float, 0D tensor, or 1D tensor with shape [batch_size] 82 | specifying far clipping plane distance. 83 | 84 | Returns: 85 | A 4D float32 tensor of shape [batch_size, image_height, image_width, 4] 86 | containing the lit RGBA color values for each image at each pixel. RGB 87 | colors are the intensity values before tonemapping and can be in the range 88 | [0, infinity]. Clipping to the range [0, 1] with np.clip is likely 89 | reasonable for both viewing and training most scenes. More complex scenes 90 | with multiple lights should tone map color values for display only. One 91 | simple tonemapping approach is to rescale color values as x/(1+x); gamma 92 | compression is another common technique. Alpha values are zero for 93 | background pixels and near one for mesh pixels. 94 | Raises: 95 | ValueError: An invalid argument to the method is detected. 96 | """ 97 | if len(vertices.shape) != 3 or vertices.shape[-1] != 3: 98 | raise ValueError( 99 | "Vertices must have shape [batch_size, vertex_count, 3].") 100 | batch_size = vertices.shape[0] 101 | if len(normals.shape) != 3 or normals.shape[-1] != 3: 102 | raise ValueError( 103 | "Normals must have shape [batch_size, vertex_count, 3].") 104 | if len(light_positions.shape) != 3 or light_positions.shape[-1] != 3: 105 | raise ValueError( 106 | "light_positions must have shape [batch_size, light_count, 3].") 107 | if len(light_intensities.shape) != 3 or light_intensities.shape[-1] != 3: 108 | raise ValueError( 109 | "light_intensities must have shape [batch_size, light_count, 3].") 110 | if len(diffuse_colors.shape) != 3 or diffuse_colors.shape[-1] != 3: 111 | raise ValueError( 112 | "diffuse_colors must have shape [batch_size, vertex_count, 3].") 113 | if (ambient_color is not None and 114 | list(ambient_color.shape) != [batch_size, 3]): 115 | raise ValueError("ambient_color must have shape [batch_size, 3].") 116 | if list(camera_position.shape) == [3]: 117 | camera_position = torch.unsqueeze(camera_position, 0).repeat(batch_size, 1) 118 | elif list(camera_position.shape) != [batch_size, 3]: 119 | raise ValueError( 120 | "camera_position must have shape [batch_size, 3] or [3].") 121 | if list(camera_lookat.shape) == [3]: 122 | camera_lookat = torch.unsqueeze(camera_lookat, 0).repeat(batch_size, 1) 123 | elif list(camera_lookat.shape) != [batch_size, 3]: 124 | raise ValueError( 125 | "camera_lookat must have shape [batch_size, 3] or [3].") 126 | if list(camera_up.shape) == [3]: 127 | camera_up = torch.unsqueeze(camera_up, 0).repeat(batch_size, 1) 128 | elif list(camera_up.shape) != [batch_size, 3]: 129 | raise ValueError("camera_up must have shape [batch_size, 3] or [3].") 130 | if isinstance(fov_y, float): 131 | fov_y = torch.tensor(batch_size * [fov_y], dtype=torch.float32) 132 | elif len(fov_y.shape) == 0: 133 | fov_y = torch.unsqueeze(fov_y, 0).repeat(batch_size) 134 | elif list(fov_y.shape) != [batch_size]: 135 | raise ValueError("fov_y must be a float, a 0D tensor, or a 1D tensor " 136 | "with shape [batch_size].") 137 | if isinstance(near_clip, float): 138 | near_clip = torch.tensor(batch_size * [near_clip], dtype=torch.float32) 139 | elif len(near_clip.shape) == 0: 140 | near_clip = torch.unsqueeze(near_clip, 0).repeat(batch_size) 141 | elif list(near_clip.shape) != [batch_size]: 142 | raise ValueError("near_clip must be a float, a 0D tensor, or a 1D " 143 | "tensor with shape [batch_size].") 144 | if isinstance(far_clip, float): 145 | far_clip = torch.tensor(batch_size * [far_clip], dtype=torch.float32) 146 | elif len(far_clip.shape) == 0: 147 | far_clip = torch.unsqueeze(far_clip, 0).repeat(batch_size) 148 | elif list(far_clip.shape) != [batch_size]: 149 | raise ValueError("far_clip must be a float, a 0D tensor, or a 1D " 150 | "tensor with shape [batch_size].") 151 | if specular_colors is not None and shininess_coefficients is None: 152 | raise ValueError( 153 | "Specular colors were supplied without shininess coefficients.") 154 | if shininess_coefficients is not None and specular_colors is None: 155 | raise ValueError( 156 | "Shininess coefficients were supplied without specular colors.") 157 | if specular_colors is not None: 158 | # Since a 0D float32 tensor is accepted, also accept a float. 159 | if isinstance(shininess_coefficients, float): 160 | shininess_coefficients = torch.tensor( 161 | shininess_coefficients, dtype=torch.float32) 162 | if len(specular_colors.shape) != 3: 163 | raise ValueError("The specular colors must have shape [batch_size, " 164 | "vertex_count, 3].") 165 | if len(shininess_coefficients.shape) > 2: 166 | raise ValueError("The shininess coefficients must have shape at " 167 | "most [batch_size, vertex_count].") 168 | # If we don't have per-vertex coefficients, we can just reshape the 169 | # input shininess to broadcast later, rather than interpolating an 170 | # additional vertex attribute: 171 | if len(shininess_coefficients.shape) < 2: 172 | vertex_attributes = torch.cat( 173 | [normals, vertices, diffuse_colors, specular_colors], 2) 174 | else: 175 | vertex_attributes = torch.cat( 176 | [ 177 | normals, vertices, diffuse_colors, specular_colors, 178 | torch.unsqueeze(shininess_coefficients, 2) 179 | ], 2) 180 | else: 181 | vertex_attributes = torch.cat([normals, vertices, diffuse_colors], 2) 182 | 183 | camera_matrices = camera_utils.look_at(camera_position, camera_lookat, 184 | camera_up) 185 | 186 | perspective_transforms = camera_utils.perspective( 187 | image_width / image_height, 188 | fov_y, 189 | near_clip, 190 | far_clip) 191 | 192 | clip_space_transforms = torch.matmul(perspective_transforms, camera_matrices) 193 | 194 | pixel_attributes = rasterize( 195 | vertices, vertex_attributes, triangles, 196 | clip_space_transforms, image_width, image_height, 197 | torch.tensor([-1] * vertex_attributes.shape[2])) 198 | 199 | # Extract the interpolated vertex attributes from the pixel buffer and 200 | # supply them to the shader: 201 | pixel_normals = torch.nn.functional.normalize( 202 | pixel_attributes[:, :, :, 0:3], p=2, dim=3) 203 | pixel_positions = pixel_attributes[:, :, :, 3:6] 204 | diffuse_colors = pixel_attributes[:, :, :, 6:9] 205 | if specular_colors is not None: 206 | specular_colors = pixel_attributes[:, :, :, 9:12] 207 | # Retrieve the interpolated shininess coefficients if necessary, or just 208 | # reshape our input for broadcasting: 209 | if len(shininess_coefficients.shape) == 2: 210 | shininess_coefficients = pixel_attributes[:, :, :, 12] 211 | else: 212 | shininess_coefficients = torch.reshape( 213 | shininess_coefficients, [-1, 1, 1]) 214 | 215 | pixel_mask = (diffuse_colors >= 0.0).any(dim=3).type(torch.float32) 216 | 217 | renders = phong_shader( 218 | normals=pixel_normals, 219 | alphas=pixel_mask, 220 | pixel_positions=pixel_positions, 221 | light_positions=light_positions, 222 | light_intensities=light_intensities, 223 | diffuse_colors=diffuse_colors, 224 | camera_position=camera_position if specular_colors is not None else None, 225 | specular_colors=specular_colors, 226 | shininess_coefficients=shininess_coefficients, 227 | ambient_color=ambient_color) 228 | return renders 229 | 230 | 231 | def phong_shader(normals, 232 | alphas, 233 | pixel_positions, 234 | light_positions, 235 | light_intensities, 236 | diffuse_colors=None, 237 | camera_position=None, 238 | specular_colors=None, 239 | shininess_coefficients=None, 240 | ambient_color=None): 241 | """Compute pixelwise lighting from rasterized buffers with the Phong model. 242 | 243 | Args: 244 | normals: a 4D float32 tensor with shape [batch_size, image_height, 245 | image_width, 3]. The inner dimension is the world space XYZ normal 246 | for the corresponding pixel. Should be already normalized. 247 | alphas: a 3D float32 tensor with shape [batch_size, image_height, 248 | image_width]. The inner dimension is the alpha value (transparency) 249 | for the corresponding pixel. 250 | pixel_positions: a 4D float32 tensor with shape [batch_size, 251 | image_height, image_width, 3]. The inner dimension is the world 252 | space XYZ position for the corresponding pixel. 253 | light_positions: a 3D tensor with shape [batch_size, light_count, 3]. 254 | The XYZ position of each light in the scene. In the same coordinate 255 | space as pixel_positions. 256 | light_intensities: a 3D tensor with shape [batch_size, light_count, 3]. 257 | The RGB intensity values for each light. Intensities may be above 1. 258 | diffuse_colors: a 4D float32 tensor with shape [batch_size, image_height, 259 | image_width, 3]. The inner dimension is the diffuse RGB coefficients 260 | at a pixel in the range [0, 1]. 261 | camera_position: a 1D tensor with shape [batch_size, 3]. The XYZ camera 262 | position in the scene. If supplied, specular reflections will be 263 | computed. If not supplied, specular_colors and shininess_coefficients 264 | are expected to be None. In the same coordinate space as 265 | pixel_positions. 266 | specular_colors: a 4D float32 tensor with shape [batch_size, 267 | image_height, image_width, 3]. The inner dimension is the specular 268 | RGB coefficients at a pixel in the range [0, 1]. If None, assumed 269 | to be torch.zeros(). 270 | shininess_coefficients: a 3D float32 tensor that is broadcasted to 271 | shape [batch_size, image_height, image_width]. The inner dimension 272 | is the shininess coefficient for the object at a pixel. Dimensions 273 | that are constant can be given length 1, so [batch_size, 1, 1] and 274 | [1, 1, 1] are also valid input shapes. 275 | ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient 276 | color, which is added to each pixel before tone mapping. If None, 277 | it is assumed to be torch.zeros(). 278 | 279 | Returns: 280 | A 4D float32 tensor of shape [batch_size, image_height, image_width, 4] 281 | containing the lit RGBA color values for each image at each pixel. 282 | Colors are in the range [0, 1]. 283 | 284 | Raises: 285 | ValueError: An invalid argument to the method is detected. 286 | """ 287 | batch_size, image_height, image_width = [s for s in normals.shape[:-1]] 288 | light_count = light_positions.shape[1] 289 | pixel_count = image_height * image_width 290 | # Reshape all values to easily do pixelwise computations: 291 | normals = torch.reshape(normals, [batch_size, -1, 3]) 292 | alphas = torch.reshape(alphas, [batch_size, -1, 1]) 293 | diffuse_colors = torch.reshape(diffuse_colors, [batch_size, -1, 3]) 294 | if camera_position is not None: 295 | specular_colors = torch.reshape(specular_colors, [batch_size, -1, 3]) 296 | 297 | # Ambient component 298 | output_colors = torch.zeros([batch_size, image_height * image_width, 3]) 299 | if ambient_color is not None: 300 | ambient_reshaped = torch.unsqueeze(ambient_color, 1) 301 | output_colors = output_colors + ambient_reshaped * diffuse_colors 302 | 303 | # Diffuse component 304 | pixel_positions = torch.reshape(pixel_positions, [batch_size, -1, 3]) 305 | per_light_pixel_positions = torch.stack( 306 | [pixel_positions] * light_count, 307 | dim=1) # [batch_size, light_count, pixel_count, 3] 308 | directions_to_lights = torch.nn.functional.normalize( 309 | torch.unsqueeze(light_positions, 2) - per_light_pixel_positions, 310 | p=2, 311 | dim=3) # [batch_size, light_count, pixel_count, 3] 312 | # The specular component should only contribute when the light and normal 313 | # face one another (i.e. the dot product is nonnegative): 314 | normals_dot_lights = torch.clamp( 315 | torch.sum( 316 | torch.unsqueeze(normals, 1) * directions_to_lights, dim=3), 317 | 0.0, 1.0) # [batch_size, light_count, pixel_count] 318 | diffuse_output = ( 319 | torch.unsqueeze(diffuse_colors, 1) * 320 | torch.unsqueeze(normals_dot_lights, 3) * 321 | torch.unsqueeze(light_intensities, 2)) 322 | diffuse_output = torch.sum(diffuse_output, dim=1) # [batch_size, pixel_count, 3] 323 | output_colors = output_colors + diffuse_output 324 | 325 | # Specular component 326 | if camera_position is not None: 327 | camera_position = torch.reshape(camera_position, [batch_size, 1, 3]) 328 | mirror_reflection_direction = torch.nn.functional.normalize( 329 | 2.0 * torch.unsqueeze(normals_dot_lights, 3) * torch.unsqueeze( 330 | normals, 1) - directions_to_lights, 331 | p=2, 332 | dim=3) # [batch_size, light_count, pixel_count, 3] 333 | direction_to_camera = torch.nn.functional.normalize( 334 | camera_position - pixel_positions, 335 | p=2, 336 | dim=2) # [batch_size, pixel_count, 3] 337 | reflection_direction_dot_camera_direction = torch.sum( 338 | mirror_reflection_direction * torch.unsqueeze(direction_to_camera, 1), 339 | dim=3) 340 | # The specular component should only contribute when the reflection is 341 | # external: 342 | reflection_direction_dot_camera_direction = torch.clamp( 343 | torch.nn.functional.normalize( 344 | reflection_direction_dot_camera_direction, 345 | p=2, 346 | dim=2), 347 | 0.0, 348 | 1.0) 349 | # The specular component should also only contribute when the diffuse 350 | # component contributes: 351 | reflection_direction_dot_camera_direction = torch.where( 352 | normals_dot_lights != 0.0, 353 | reflection_direction_dot_camera_direction, 354 | torch.zeros_like( 355 | reflection_direction_dot_camera_direction, 356 | dtype=torch.float32)) 357 | # Reshape to support broadcasting the shininess coefficient, which 358 | # rarely varies per-vertex: 359 | reflection_direction_dot_camera_direction = torch.reshape( 360 | reflection_direction_dot_camera_direction, 361 | [batch_size, light_count, image_height, image_width]) 362 | shininess_coefficients = torch.unsqueeze(shininess_coefficients, 1) 363 | specularity = torch.reshape( 364 | torch.pow(reflection_direction_dot_camera_direction, 365 | shininess_coefficients), 366 | [batch_size, light_count, pixel_count, 1]) 367 | specular_output = ( 368 | torch.unsqueeze(specular_colors, 1) * specularity * 369 | torch.unsqueeze(light_intensities, 2) 370 | ) 371 | specular_output = torch.sum(specular_output, dim=1) 372 | output_colors = output_colors + specular_output 373 | rgb_images = torch.reshape( 374 | output_colors, 375 | [batch_size, image_height, image_width, 3]) 376 | alpha_images = torch.reshape( 377 | alphas, 378 | [batch_size, image_height, image_width, 1]) 379 | valid_rgb_values = torch.cat(3 * [alpha_images > 0.5], dim=3) 380 | rgb_images = torch.where( 381 | valid_rgb_values, 382 | rgb_images, 383 | torch.zeros_like(rgb_images, dtype=torch.float32)) 384 | return torch.flip( 385 | torch.cat([rgb_images, alpha_images], dim=3), 386 | dims=[1]) 387 | 388 | 389 | def tone_mapper(image, gamma): 390 | """Apply gamma correction to the input image. 391 | 392 | Tone maps the input image batch in order to make scenes with a high dynamic 393 | range viewable. The gamma correction factor is computed separately per 394 | image, but is shared between all provided channels. The exact function 395 | computed is: 396 | 397 | image_out = A*image_in^gamma, where A is an image-wide constant computed 398 | so that the maximum image value is approximately 1. The correction is 399 | applied to all channels. 400 | 401 | Args: 402 | image: 4D float32 tensor with shape [batch_size, image_height, 403 | image_width, channel_count]. The batch of images to tone map. 404 | gamma: 0D float32 nonnegative tensor. Values of gamma below 1 compress 405 | relative contrast in the image, and values above one increase it. 406 | A value of 1 is equivalent to scaling the image to have a max value 407 | of 1. 408 | Returns: 409 | 4D float32 tensor with shape [batch_size, image_height, image_width, 410 | channel_count]. Contains the gamma-corrected images, clipped to the 411 | range [0, 1]. 412 | """ 413 | batch_size = image.shape[0] 414 | corrected_image = torch.pow(image, gamma) 415 | image_max = torch.max( 416 | torch.reshape(corrected_image, [batch_size, -1]), 1).values 417 | scaled_image = ( 418 | corrected_image / torch.reshape(image_max, [batch_size, 1, 1, 1])) 419 | return torch.clamp(scaled_image, 0.0, 1.0) 420 | -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Barycentrics_Cube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Barycentrics_Cube.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Colored_Cube_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Colored_Cube_0.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Colored_Cube_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Colored_Cube_1.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/External_Triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/External_Triangle.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Gray_Cube_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Gray_Cube_0.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Gray_Cube_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Gray_Cube_1.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Inside_Box.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Inside_Box.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Perspective_Corrected_Triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Perspective_Corrected_Triangle.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Simple_Tetrahedron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Simple_Tetrahedron.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Simple_Triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Simple_Triangle.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Unlit_Cube_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Unlit_Cube_0.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_data/Unlit_Cube_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Unlit_Cube_1.png -------------------------------------------------------------------------------- /src/mesh_renderer/test_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import numpy as np 7 | from skimage import io 8 | import torch 9 | from itertools import product 10 | 11 | 12 | def check_jacobians_are_nearly_equal(theoretical, 13 | numerical, 14 | outlier_relative_error_threshold, 15 | max_outlier_fraction, 16 | include_jacobians_in_error_message=False): 17 | """Compare two Jacobian matrices, allowing for some fraction of outliers. 18 | 19 | Args: 20 | theoretical: 2D numpy array containing a Jacobian matrix with entries 21 | computed via gradient functions. The layout should be as in the output 22 | of torch.autograd.gradcheck.get_analytical_jacobian. 23 | numerical: 2D numpy array of the same shape as theoretical containing a 24 | Jacobian matrix with entries computed via finite difference 25 | approximations. The layout should be as in the output of 26 | torch.autograd.gradcheck.get_numerical_jacobian. 27 | outlier_relative_error_threshold: float prescribing the max relative 28 | error (from the finite difference approximation) tolerated before an 29 | entry is considered an outlier. 30 | max_outlier_fraction: float defining the max fraction of entries in 31 | theoreticdal that may be outliers before the check returns False. 32 | include_jacobians_in_error_message: bool defining whether the jacobian 33 | matrices should be included in the return message if the test fails. 34 | 35 | Returns: 36 | A tuple (success: bool, error_msg: str). 37 | """ 38 | outlier_gradients = np.abs( 39 | numerical - theoretical) / numerical > outlier_relative_error_threshold 40 | outlier_fraction = ( 41 | np.count_nonzero(outlier_gradients) / np.prod(numerical.shape[:2])) 42 | jacobians_match = outlier_fraction <= max_outlier_fraction 43 | 44 | message = ( 45 | " %f of theoretical gradients are relative outliers, but the maximum" 46 | "allowable fraction is %f " % (outlier_fraction, max_outlier_fraction)) 47 | if include_jacobians_in_error_message: 48 | # The gradient checker convention is the typical Jacobian transposed: 49 | message += ("\nNumerical Jacobian:\n%s\nTheoretical Jacobian:\n%s" % 50 | (repr(numerical.T), repr(theoretical.T))) 51 | return jacobians_match, message 52 | 53 | 54 | def get_analytical_jacobian(input, output): 55 | """Compute the analytical jacobian for a function with a single 56 | differentiable argument. 57 | """ 58 | jacobian = torch.zeros(input.numel(), output.numel()) 59 | grad_output = torch.zeros_like(output) 60 | flag_grad_output = grad_output.view(-1) 61 | 62 | for i in range(flag_grad_output.numel()): 63 | flag_grad_output.zero_() 64 | flag_grad_output[i] = 1 65 | d_x = torch.autograd.grad(output, [input], grad_output, 66 | retain_graph=True, allow_unused=True)[0] 67 | x = input 68 | if jacobian.numel() != 0: 69 | if d_x is None: 70 | jacobian[:, i].zero_() 71 | else: 72 | d_x_dense = (d_x.to_dense() 73 | if not d_x.layout == torch.strided else d_x) 74 | assert jacobian[:, i].numel() == d_x_dense.numel() 75 | jacobian[:, i] = d_x_dense.contiguous().view(-1) 76 | 77 | return jacobian 78 | 79 | 80 | def get_numerical_jacobian(fn, input, eps=1e-3): 81 | """Compute the numerical Jacobian using finite differences. 82 | 83 | Args: 84 | fn: The function to differentiate. 85 | input: input to `fn` 86 | eps: Finite difference epsilon. 87 | """ 88 | output_size = fn(input).numel() 89 | jacobian = torch.zeros(input.numel(), output_size) 90 | x_tensor = input.data 91 | d_tensor = jacobian 92 | for d_idx, x_idx in enumerate(product(*[range(m) for m in x_tensor.size()])): 93 | orig = x_tensor[x_idx].item() 94 | x_tensor[x_idx] = orig - eps 95 | outa = fn(input).clone() 96 | x_tensor[x_idx] = orig + eps 97 | outb = fn(input).clone() 98 | x_tensor[x_idx] = orig 99 | r = (outb - outa) / (2 * eps) 100 | d_tensor[d_idx] = r.detach().reshape(-1) 101 | 102 | return jacobian 103 | 104 | 105 | def expect_image_file_and_render_are_near(test_instance, 106 | baseline_path, 107 | result_image, 108 | max_outlier_fraction=0.001, 109 | pixel_error_threshold=0.01): 110 | """Compares the output of mesh_renderer with an image on disk. 111 | 112 | The comparison is soft: the images are considered identical if at most 113 | max_outlier_fraction of the pixels differ by more than a relative error of 114 | pixel_error_threshold of the full color value. Note that before comparison, 115 | mesh renderer values are clipped to the range [0,1]. 116 | 117 | Uses _images_are_near for the actual comparison. 118 | 119 | Args: 120 | test_instance: a python unittest.TestCase instance. 121 | baseline_path: path to the reference image on disk. 122 | result_image: the result image, as a Tensor. 123 | max_outlier_fraction: the maximum fraction of outlier pixels allowed. 124 | pixel_error_threshold: pixel values are considered different if their 125 | difference exceeds this amount. Range is 0.0 - 1.0. 126 | """ 127 | baseline_image = io.imread(baseline_path) 128 | 129 | test_instance.assertEqual(baseline_image.shape, result_image.shape, 130 | "Images shapes {}and {} do not match." 131 | .format(baseline_image.shape, result_image.shape)) 132 | 133 | result_image = result_image.numpy() 134 | result_image = np.clip(result_image, 0., 1.).copy(order="C") 135 | baseline_image = baseline_image.astype(float) / 255.0 136 | 137 | diff_image = np.abs(baseline_image - result_image) 138 | outlier_channels = diff_image > pixel_error_threshold 139 | outlier_pixels = np.any(outlier_channels, axis=2) 140 | outlier_count = np.count_nonzero(outlier_pixels) 141 | outlier_fraction = outlier_count / np.prod(baseline_image.shape[:2]) 142 | images_match = outlier_fraction <= max_outlier_fraction 143 | 144 | outputs_dir = "/tmp" # os.environ["TEST_TMPDIR"] 145 | base_prefix = os.path.splitext(os.path.basename(baseline_path))[0] 146 | result_output_path = os.path.join(outputs_dir, base_prefix + "_result.png") 147 | diff_output_path = os.path.join(outputs_dir, base_prefix + "_diff.png") 148 | 149 | message = ("{} does not match. ({} of pixels are outliers, {} is allowed.)." 150 | " Result image written to {}, Diff written to {}" 151 | .format( 152 | baseline_path, outlier_fraction, 153 | max_outlier_fraction, result_output_path, diff_output_path)) 154 | 155 | if not images_match: 156 | io.imsave(result_output_path, (result_image * 255.0).astype(np.uint8)) 157 | diff_image[:,:,3] = 1.0 158 | io.imsave(diff_output_path, (diff_image * 255.0).astype(np.uint8)) 159 | 160 | test_instance.assertTrue(images_match, msg=message) 161 | -------------------------------------------------------------------------------- /src/soft_mesh_renderer/README.md: -------------------------------------------------------------------------------- 1 | # soft_mesh_renderer 2 | 3 | This package contains a differentiable, 3D mesh renderer using the probabilistic rasterization formulation by [Liu et al. 2019 "Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning"](https://arxiv.org/abs/1904.01786). It is an alternate implementation of [SoftRas](https://github.com/ShichenLiu/SoftRas) that I built for my own learning. Compare also the implementation from [PyTorch3D](https://github.com/facebookresearch/pytorch3d). 4 | 5 | The renderer supports rendering textured triangle meshes to images with diffuse phong shading including multiple lights. Gradients of the image RGBA pixels can be obtained with respect to mesh vertices, texture colors, camera parameters, and lights. 6 | 7 | The code is un-optimized as it's Python-only compared to the original which implements forward and backwards passes with dedicated CUDA kernels, but I hope it's more readable and others will find it useful. 8 | 9 | # Testing 10 | 11 | Run from the repository root: 12 | ``` 13 | python -m src.soft_mesh_renderer.test_rasterize 14 | ``` 15 | 16 | # Usage 17 | 18 | ## Rendering a shaded mesh 19 | 20 | Rendering a shaded mesh can be done with the `render` function in `soft_mesh_renderer/render.py`. This function renders an input scene (mesh, lights, and camera) using phong shading, and returns an output image. 21 | 22 | #### Args: 23 | 24 | - `vertices`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. Each triplet is an xyz position in world space. 25 | - `triangles`: 2D int32 tensor with shape `[triangle_count, 3]`. Each triplet should contain vertex indices describing a triangle such that the triangle's normal points toward the viewer if the forward order of the triplet defines a counter-clockwise winding of the vertices. Gradients with respect to this tensor are not available. 26 | - `diffuse_colors`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. The RGB diffuse reflection in the range `[0, 1]` for each vertex. 27 | - `camera_position`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape `[3]` specifying the XYZ world space camera position. 28 | - `camera_lookat`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape `[3]` containing an XYZ point along the center of the camera's gaze. 29 | - `camera_up`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape 30 | `[3]` containing the up direction for the camera. The camera will have no tilt with respect to this direction. 31 | - `light_positions`: a 3D tensor with shape `[batch_size, light_count, 3]`. The world space XYZ position of each light in the scene. 32 | - `light_intensities`: a 3D tensor with shape `[batch_size, light_count]`. The intensity values for each light. Intensities may be above 1. 33 | - `image_width`: int specifying desired output image width in pixels. 34 | - `image_height`: int specifying desired output image height in pixels. 35 | - `sigma_val`: parameter controlling the sharpness of the coverage distribution for a single triangle. A smaller sigma leads to a sharper distribution. 36 | - `gamma_val`: temperature parameter controlling uniformity of the triangle probability distribution for a pixel in the depth aggregation. When gamma is 0, all probability mass will fall into the triangle with highest z, matching the behavior of z-buffering. 37 | - `fov_y`: float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying desired output image y field of view in degrees. 38 | - `near_clip`: float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying near clipping plane distance. 39 | - `far_clip`: float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying far clipping plane distance. 40 | 41 | #### Returns: 42 | 43 | A 4D float32 tensor of shape `[batch_size, image_height, image_width, 4]` containing the lit RGBA color values for each image at each pixel. 44 | - The RGB values are aggregated per-pixel according to the color aggregation formula in [1]. 45 | - The alpha values are aggregated per-pixel according to the silhouette formula in [1]. 46 | 47 | [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning' 48 | 49 | ### Example 50 | 51 | An example usage of the differentiable mesh renderer to render a teapot can be seen in [`src/examples/example1b.py`](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/examples/example1b.py). -------------------------------------------------------------------------------- /src/soft_mesh_renderer/__init__.py: -------------------------------------------------------------------------------- 1 | from .render import render -------------------------------------------------------------------------------- /src/soft_mesh_renderer/quadtree.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def intersects(bbox1, bbox2): 4 | """ 5 | Returns whether two two-dimensional bounding boxes intersect. 6 | 7 | Args: 8 | - bbox1: Tensor of shape [2, 2] where bbox1[0] gives the xy-coordinate 9 | of the top-left corner and bbox[1] gives the bottom-right corner. 10 | - bbox2: Same format as bbox1. 11 | 12 | Returns: 13 | - A boolean indicating whether the bounding boxes intersect. 14 | """ 15 | return (bbox1[0, 0] <= bbox2[1, 0] and bbox1[1, 0] >= bbox2[0, 0] and 16 | bbox1[0, 1] <= bbox2[1, 1] and bbox1[1, 1] >= bbox2[0, 1]) 17 | 18 | def contains(bbox, p): 19 | """ 20 | Returns whether a bounding box contains a 2D point p. 21 | 22 | Args: 23 | - bbox: Tensor of shape [2, 2] where bbox1[0] gives the xy-coordinate 24 | of the top-left corner and bbox[1] gives the bottom-right corner. 25 | - p: Tensor of shape [2]. 26 | 27 | Returns: 28 | - A boolean indicating whether bbox contains p. 29 | """ 30 | return (p[0] <= bbox[1][0] and p[0] >= bbox[0][0] and 31 | p[1] <= bbox[1][1] and p[1] >= bbox[0][1]) 32 | 33 | """ 34 | Quadtree data structure to store geometric data with associated bounding boxes. 35 | """ 36 | MAX_DEPTH = 5 37 | class QuadTreeNode: 38 | def __init__(self, bbox, depth): 39 | self.bbox = bbox 40 | self.depth = depth 41 | self.data = [] 42 | self.children = [] 43 | 44 | def insert(self, bbox, data): 45 | if len(self.children) != 0: 46 | for child in self.children: 47 | if intersects(child.bbox, bbox): 48 | child.insert(bbox, data) 49 | else: 50 | if len(self.data) != 0 and self.depth < MAX_DEPTH: 51 | # subdivide 52 | next_depth = self.depth + 1 53 | top = self.bbox[0][1] 54 | left = self.bbox[0][0] 55 | right = self.bbox[1][0] 56 | bottom = self.bbox[1][1] 57 | 58 | center = (self.bbox[0] + self.bbox[1]) / 2. 59 | self.children = [ 60 | # top-left 61 | QuadTreeNode( 62 | torch.stack([ 63 | self.bbox[0], 64 | center 65 | ]), next_depth), 66 | # top-right 67 | QuadTreeNode( 68 | torch.stack([ 69 | torch.tensor([center[0], top]), 70 | torch.tensor([right, center[1]]) 71 | ]), next_depth), 72 | # bottom-left 73 | QuadTreeNode( 74 | torch.stack([ 75 | torch.tensor([left, center[1]]), 76 | torch.tensor([center[0], bottom]) 77 | ]), next_depth), 78 | # bottom-right 79 | QuadTreeNode( 80 | torch.stack([ 81 | center, 82 | self.bbox[1] 83 | ]), next_depth), 84 | ] 85 | self.data.append((bbox, data)) 86 | for d_bbox, d in self.data: 87 | for child in self.children: 88 | if intersects(child.bbox, d_bbox): 89 | child.insert(d_bbox, d) 90 | self.data = [] 91 | else: 92 | self.data.append((bbox, data)) 93 | 94 | def leaf_for_point(self, p): 95 | if not contains(self.bbox, p): 96 | return None 97 | for child in self.children: 98 | l = child.leaf_for_point(p) 99 | if l: 100 | return l 101 | return self 102 | -------------------------------------------------------------------------------- /src/soft_mesh_renderer/rasterize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Differentiable triangle rasterizer using soft rasterization formulation. 3 | """ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import torch 10 | 11 | from ..common import camera_utils 12 | from .quadtree import QuadTreeNode, contains 13 | 14 | def rasterize( 15 | world_space_vertices, 16 | triangles, 17 | ### vertex attributes 18 | normals, 19 | diffuse_colors, 20 | ### lighting 21 | light_positions, 22 | light_intensities, 23 | ### 24 | camera_matrices, 25 | image_width, 26 | image_height, 27 | sigma_val, 28 | gamma_val, 29 | blur_radius=0.01 30 | ): 31 | """ 32 | Soft-rasterize a mesh, interpolating vertex attributes, lighting with phong shading, 33 | and soft-aggregating the result for every pixel. 34 | 35 | Args: 36 | world_space_vertices: 3D float32 tensor of xyz positions with shape 37 | [batch_size, vertex_count, 3]. 38 | triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet 39 | should contain vertex indices describing a triangle such that the 40 | triangle's normal points toward the viewer if the forward order of 41 | the triplet defines a counter-clockwise winding of the vertices. Gradients 42 | with respect to this tensor are not available. 43 | 44 | normals: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each 45 | triplet is the xyz vertex normal for its corresponding vertex. Each 46 | vector is assumed to be already normalized. 47 | diffuse_colors: 3D float32 tensor with shape [batch_size, 48 | vertex_count, 3]. The RGB diffuse reflection in the range [0, 1] for 49 | each vertex. 50 | 51 | light_positions: a 3D tensor with shape [batch_size, light_count, 3]. The 52 | world space XYZ position of each light in the scene. 53 | light_intensities: a 3D tensor with shape [batch_size, light_count]. 54 | The intensity values for each light. Intensities may be above 1. 55 | 56 | camera_matrices: 3D float tensor with shape [batch_size, 4, 4] containing 57 | model-view-perspective projection matrices. 58 | image_width: int specifying desired output image width in pixels. 59 | image_height: int specifying desired output image height in pixels. 60 | sigma_val: parameter controlling the sharpness of the coverage distribution 61 | for a single triangle. A smaller sigma leads to a sharper distribution. 62 | gamma_val: temperature parameter controlling uniformity of the triangle 63 | probability distribution for a pixel in the depth aggregation. 64 | When gamma is 0, all probability mass will fall into the triangle 65 | with highest z, matching the behavior of z-buffering. 66 | blur_radius: float specifying the cutoff radius of soft-rasterization sampling 67 | in NDC-space. 68 | 69 | Returns: 70 | A 4D float32 tensor of shape [batch_size, image_height, image_width, 4] 71 | containing the lit RGBA color values for each image at each pixel. 72 | The RGB values are aggregated per-pixel according to the color aggregation 73 | formula in [1]. 74 | The alpha values are aggregated per-pixel according to the silhouette 75 | formula in [1]. 76 | 77 | [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for 78 | Image-based 3D Reasoning' 79 | 80 | Raises: 81 | ValueError: An invalid argument to the method is detected. 82 | """ 83 | vertex_count = world_space_vertices.shape[1] 84 | batch_size = world_space_vertices.shape[0] 85 | 86 | clip_space_vertices = camera_utils.transform_homogeneous( 87 | camera_matrices, world_space_vertices) 88 | 89 | batch_images = [] 90 | 91 | for b in range(batch_size): 92 | image = rasterize_batch( 93 | clip_space_vertices[b, :, :], 94 | triangles, 95 | ### vertex attributes 96 | world_space_vertices[b, :, :], 97 | normals[b, :, :], 98 | diffuse_colors[b, :, :], 99 | ### lighting 100 | light_positions[b, :, :], 101 | light_intensities[b, :], 102 | ### 103 | image_width, 104 | image_height, 105 | sigma_val, 106 | gamma_val 107 | ) 108 | batch_images.append(image) 109 | 110 | return torch.stack(batch_images, 0) 111 | 112 | # Returns the signed area of the parallelogram 113 | # with edges v0p and v01. All inputs should be tensors 114 | # of shape [2] or [3]. 115 | # 116 | # The area is positive if point p is on the right side 117 | # of the segment going from v0 to v1 (so that [p, v0, v1] 118 | # winds clockwise) and negative if p is on the left (so 119 | # that [p, v0, v1] winds counter-clockwise). 120 | def edge_function(p, v0, v1): 121 | v0p = p - v0 122 | v01 = v1 - v0 123 | return v0p[0] * v01[1] - v0p[1] * v01[0] 124 | 125 | # Returns barycentric coordinates of a 3D point P w.r.t. triangle v0, v1, v2. 126 | # The input `M_inv` should be the inverse of a 3x3 matrix where the columns are the vertices. 127 | def barycentric(M_inv, p): 128 | return M_inv @ p 129 | 130 | # Returns barycentric coordinates of a point P (in homogeneous 3D coordinates xyz) 131 | # w.r.t. triangle v0, v1, v2, the same for the point on the edge of the triangle nearest to P, 132 | # and the distance between them. 133 | # Args: 134 | # - p: 3D point, a tensor with shape [3]. 135 | # - M: A 3x3 matrix where the columns are the vertices v0, v1, v2 of the triangle. 136 | # - M_inv: The inverse of M. 137 | # 138 | # Returns: 139 | # - bc_p: 1D tensor of shape [3] giving barycentric coordinates for p. 140 | # If p is outside the triangle, one of the coordinates will be negative. 141 | # - mindist_sq: scalar tensor (float) giving the squared distance from p to the nearest point. 142 | # - bc_edge: 1D tensor of shape [3] giving barycentric coordinates for the nearest point 143 | # on the edge of the triangle. 144 | def barycentric_edge(M, M_inv, p): 145 | bc_p = barycentric(M_inv, p) 146 | v01_nearest, t01 = point_to_segment_nearest(p[:2], M[:, 0][:2], M[:, 1][:2]) 147 | v12_nearest, t12 = point_to_segment_nearest(p[:2], M[:, 1][:2], M[:, 2][:2]) 148 | v20_nearest, t20 = point_to_segment_nearest(p[:2], M[:, 2][:2], M[:, 0][:2]) 149 | d = torch.stack([v01_nearest, v12_nearest, v20_nearest]) - p[:2] 150 | mindist_sq, argmin = torch.min(torch.sum(d * d, dim=-1), dim=0) 151 | if argmin == 0: 152 | return bc_p, mindist_sq, torch.stack([1. - t01, t01, torch.tensor(0.)]) 153 | elif argmin == 1: 154 | return bc_p, mindist_sq, torch.stack([torch.tensor(0.), 1. - t12, t12]) 155 | else: 156 | return bc_p, mindist_sq, torch.stack([t20, torch.tensor(0.), 1. - t20]) 157 | 158 | # Returns the point on a 2D line segment which is nearest to the input point, 159 | # and the number t between [0, 1] giving how far that is on the segment. 160 | # 161 | # Args: 162 | # - p: 2D point, a tensor with shape [2] that we want to project on the line segment. 163 | # - a: 2D point, a tensor with shape [2]. Start of the line segment. 164 | # - b: 2D point, a tensor with shape [2]. End of the line segment. 165 | # 166 | # Returns: 167 | # - x: 2D point, the point on the line segment nearest p. 168 | # - t: Number between [0, 1] giving the normalized distance from `a` to `x`. 169 | def point_to_segment_nearest(p, a, b): 170 | ab = b - a 171 | len_ab = torch.linalg.vector_norm(b - a, ord=2) 172 | n = ab / max(len_ab, 1e-12) 173 | proj_p_n = torch.dot(p - a, n) * n 174 | t = torch.clamp(torch.dot(proj_p_n, n) / len_ab, 0., 1.) 175 | x = a + t * ab 176 | return x, t 177 | 178 | # Samples the diffuse texture of the triangle at the given barycentric 179 | # coordinates, then returns the corresponding RGBA color with phong shading 180 | # applied to it. 181 | # Returns: 182 | # - a tensor of shape [3] giving the lit RGB value for this pixel 183 | def compute_shaded_color( 184 | bc, 185 | triangle, 186 | ### vertex attributes 187 | world_space_vertices, 188 | normals, 189 | diffuse_colors, 190 | ### lighting 191 | light_positions, 192 | light_intensities, 193 | ): 194 | light_count = len(light_positions) 195 | diffuse_color = bc @ diffuse_colors[triangle, :] # [3] 196 | p = bc @ world_space_vertices[triangle, :] # [3] 197 | n = torch.nn.functional.normalize(bc @ normals[triangle, :], p=2, dim=-1) # [3] 198 | dirs_to_lights = torch.nn.functional.normalize( 199 | light_positions - p, p=2, dim=-1) # [light_count, 3] 200 | 201 | # Surfaces should only be illuminated when the light and normal face 202 | # one another (e.g. dot product is non-negative) 203 | normals_dot_lights = torch.clamp( 204 | torch.sum(dirs_to_lights * n, dim=-1), 205 | 0.0, 1.0) # [light_count] 206 | diffuse_output = diffuse_color * torch.sum(normals_dot_lights * light_intensities, dim=-1) # [3] 207 | 208 | return diffuse_output 209 | 210 | SHOW_DEBUG_LOGS = False 211 | EPS = 1e-10 # used to give background color a constant small probability 212 | def rasterize_batch( 213 | clip_space_vertices, 214 | triangles, 215 | ### vertex attributes 216 | world_space_vertices, 217 | normals, 218 | diffuse_colors, 219 | ### lighting 220 | light_positions, 221 | light_intensities, 222 | ### 223 | image_width, 224 | image_height, 225 | sigma_val, 226 | gamma_val, 227 | blur_radius=0.01 228 | ): 229 | """ 230 | Soft-rasterize a mesh already transformed to clip space. 231 | Non-batched function. 232 | 233 | Args: 234 | clip_space_vertices: 2D float32 tensor of homogeneous vertices (xyzw) 235 | with shape [vertex_count, 4]. 236 | triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet 237 | should contain vertex indices describing a triangle such that the 238 | triangle's normal points toward the viewer if the forward order of 239 | the triplet defines a counter-clockwise winding of the vertices. Gradients 240 | with respect to this tensor are not available. 241 | 242 | world_space_vertices: 2D float32 tensor of xyz positions with shape 243 | [vertex_count, 3]. 244 | normals: 2D float32 tensor with shape [vertex_count, 3]. Each 245 | triplet is the xyz vertex normal for its corresponding vertex. Each 246 | vector is assumed to be already normalized. 247 | diffuse_colors: 2D float32 tensor with shape [vertex_count, 3]. The RGB 248 | diffuse reflection in the range [0, 1] for each vertex. 249 | 250 | light_positions: a 2D tensor with shape [light_count, 3]. The world space 251 | XYZ position of each light in the scene. 252 | light_intensities: a 1D tensor with shape [light_count]. 253 | The intensity values for each light. Intensities may be above 1. 254 | 255 | image_width: int specifying desired output image width in pixels. 256 | image_height: int specifying desired output image height in pixels. 257 | sigma_val: parameter controlling the sharpness of the coverage distribution 258 | for a single triangle. A smaller sigma leads to a sharper distribution. 259 | gamma_val: temperature parameter controlling uniformity of the triangle 260 | probability distribution for a pixel in the depth aggregation. 261 | When gamma is 0, all probability mass will fall into the triangle 262 | with highest z, matching the behavior of z-buffering. 263 | blur_radius: float specifying the cutoff radius of soft-rasterization sampling 264 | in NDC-space. 265 | 266 | Returns: 267 | A 3D float32 tensor of shape [image_height, image_width, 4] 268 | containing the lit RGBA color values at each pixel. 269 | The RGB values are aggregated per-pixel according to the color aggregation 270 | formula in [1]. 271 | The alpha values are aggregated per-pixel according to the silhouette 272 | formula in [1]. 273 | 274 | [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for 275 | Image-based 3D Reasoning' 276 | """ 277 | sq_blur_radius = blur_radius**2 278 | result = torch.zeros([image_height, image_width, 4], dtype=torch.float32) 279 | 280 | ndc_face_matrices = torch.zeros([len(triangles), 3, 3], dtype=torch.float32) 281 | ndc_2d_face_matrices_inv = torch.zeros([len(triangles), 3, 3], dtype=torch.float32) 282 | ndc_face_areas = torch.zeros([len(triangles)], dtype=torch.float32) 283 | quadtree = QuadTreeNode(torch.tensor([[-1., -1.], [1., 1.]]), 0) 284 | for i in range(len(triangles)): 285 | triangle = triangles[i] 286 | clip_v012 = clip_space_vertices[triangle] # shape: [3, 4] 287 | clip_v012_w = clip_v012[:, [3]] # shape: [3, 1] 288 | 289 | ndc_M = (clip_v012 / (clip_v012_w)).T[:3, :] # [3, 3], each column is a vertex 290 | ndc_face_matrices[i, :, :] = ndc_M 291 | 292 | ndc_2d_M = ndc_M.clone() 293 | ndc_2d_M[2, :] = torch.tensor([1., 1., 1.]) 294 | try: 295 | ndc_2d_M_inv = ndc_2d_M.inverse() 296 | except Exception: 297 | # NDC-space vertex basis is not invertible, meaning triangle is 298 | # degenerate when projected (zero area). 299 | continue 300 | ndc_2d_face_matrices_inv[i, :, :] = ndc_2d_M_inv 301 | ndc_face_areas[i] = edge_function(ndc_M[:, 0], ndc_M[:, 1], ndc_M[:, 2]) 302 | ndc_bbox = torch.tensor([ 303 | [torch.min(ndc_M[0, :]) - blur_radius, torch.min(ndc_M[1, :]) - blur_radius], 304 | [torch.max(ndc_M[0, :]) + blur_radius, torch.max(ndc_M[1, :]) + blur_radius] 305 | ]) 306 | quadtree.insert(ndc_bbox, i) 307 | 308 | total_samples = 0 309 | for y in range(image_height): 310 | 311 | row_samples_drawn = 0 312 | row_max_samples_drawn = 0 313 | 314 | for x in range(image_width): 315 | ndc_x = 2.0 * ((x + 0.5) / image_width) - 1.0 316 | ndc_y = -2.0 * ((y + 0.5) / image_height) + 1.0 # invert y 317 | ndc_p = torch.tensor([ndc_x, ndc_y, 1.0]) 318 | 319 | soft_weights = torch.zeros([len(triangles)]) 320 | soft_fragments = torch.zeros([len(triangles)]) 321 | soft_colors = torch.zeros([len(triangles), 3]) 322 | 323 | samples_drawn = 0 324 | for triangle_bbox, i in quadtree.leaf_for_point(ndc_p[:2]).data: 325 | triangle = triangles[i] 326 | 327 | clip_v012 = clip_space_vertices[triangle] # shape: [3, 4] 328 | clip_v012_w = clip_v012[:, [3]] # shape: [3, 1] 329 | ndc_M = ndc_face_matrices[i] # [3, 3] 330 | ndc_depths = ndc_M.T[:, [2]] # [3, 1] 331 | if ndc_face_areas[i] > 0: 332 | # Back-face culling: skip triangles facing away from the camera. 333 | continue 334 | elif ndc_face_areas[i] == 0: 335 | # Skip degenerate triangles with zero area. 336 | continue 337 | ndc_2d_M_inv = ndc_2d_face_matrices_inv[i] 338 | 339 | # fast distance culling: check if pixel is outside the 340 | # triangle's bounding box inflated by blur_radius 341 | if not contains(triangle_bbox, ndc_p[:2]): 342 | continue 343 | bc_screen, sq_dist, bc_edge_screen = barycentric_edge( 344 | # Note: ndc_2d_M_inv is the inverse of `ndc_M` with uniform z-components, 345 | # not `ndc_M` itself. This is ok because we only use the `M` matrix in 346 | # this function to extract the x and y components of face vertices. 347 | ndc_M, 348 | ndc_2d_M_inv, 349 | ndc_p 350 | ) 351 | is_inside = not torch.any(bc_screen < 0.) 352 | 353 | # slow distance culling: check if pixel is too far from sample point 354 | if not is_inside and sq_dist > sq_blur_radius: 355 | continue 356 | 357 | # Get perspective-correct barycentric coordinates for the point to sample from 358 | # by un-doing the perspective projection on the screen-space barycentrics. 359 | sample_bc = torch.nn.functional.normalize( 360 | # If p is inside the triangle, sample from p itself. 361 | # Otherwise, sample from the point inside the triangle nearest to p. 362 | (bc_screen if is_inside else bc_edge_screen) 363 | / clip_v012_w.T[0], 364 | dim=0, p=1 365 | ) # [3] 366 | 367 | # Get normalized depth of nearest points in NDC-space. 368 | z = sample_bc @ ndc_depths # Range [-1, +1] where -1 is near plane 369 | # Map to range (0, 1) where 1.0 is near plane, 0.0 is far plane 370 | z = 0.5 - z/2. 371 | 372 | if z < 0.0 or z > 1.0: 373 | # Sample point is out of screen, pass 374 | continue 375 | 376 | soft_colors[i, :3] = compute_shaded_color( 377 | sample_bc, 378 | triangle, 379 | ### vertex attributes 380 | world_space_vertices, 381 | normals, 382 | diffuse_colors, 383 | ### lighting 384 | light_positions, 385 | light_intensities, 386 | ) 387 | 388 | sgn = 1. if is_inside else -1. 389 | soft_fragments[i] = torch.special.expit(sgn * sq_dist / sigma_val) 390 | 391 | # Set these equal to the un-exponentiated logits. 392 | # We shouldn't exponentiate until we can adjust the maximum value 393 | # below to avoid overflow. 394 | soft_weights[i] = z / gamma_val 395 | samples_drawn += 1 396 | 397 | max_soft_weight = max(torch.max(soft_weights), torch.tensor(EPS / gamma_val)) 398 | soft_weights = soft_fragments * torch.exp(soft_weights - max_soft_weight) 399 | 400 | # background weight should never be zero. 401 | bg_weight = max(torch.exp(EPS / gamma_val - max_soft_weight), EPS) 402 | 403 | # normalize all logits 404 | sum_weights = torch.sum(soft_weights) + bg_weight 405 | soft_weights = soft_weights / sum_weights 406 | 407 | # bg color is transparent, otherwise we'd add `(bg_weight / sum_weights) * bg_color` 408 | result[y][x][:3] = soft_weights @ soft_colors 409 | 410 | # Compute the silhouette score, which is based on the probability that 411 | # at least 1 triangle covers the pixel. This is 1 - probability that 412 | # all triangles do not cover the pixel. 413 | silhouette = 1.0 - torch.prod((1.0 - soft_fragments)) 414 | result[y][x][3] = silhouette 415 | 416 | row_samples_drawn += samples_drawn 417 | row_max_samples_drawn = max(row_max_samples_drawn, samples_drawn) 418 | total_samples += samples_drawn 419 | if SHOW_DEBUG_LOGS: 420 | print("drew {} samples (max={}) for row y={}".format(row_samples_drawn, row_max_samples_drawn, y)) 421 | if SHOW_DEBUG_LOGS: 422 | print("drew {} samples total".format(total_samples)) 423 | 424 | return result -------------------------------------------------------------------------------- /src/soft_mesh_renderer/render.py: -------------------------------------------------------------------------------- 1 | """ 2 | Differentiable 3D rendering of a triangle mesh based on 3 | the soft rasterization formulation from Liu 2019. 4 | """ 5 | 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | 12 | from ..common import camera_utils, meshes 13 | from .rasterize import rasterize 14 | 15 | def render( 16 | vertices, 17 | triangles, 18 | diffuse_colors, 19 | camera_position, 20 | camera_lookat, 21 | camera_up, 22 | light_positions, 23 | light_intensities, 24 | image_width, 25 | image_height, 26 | sigma_val=1e-5, 27 | gamma_val=1e-4, 28 | blur_radius=0.01, 29 | fov_y=40.0, 30 | near_clip=0.01, 31 | far_clip=10.0): 32 | """Soft-renders an input scene using phong shading, and returns an output image. 33 | 34 | Args: 35 | vertices: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each 36 | triplet is an xyz position in world space. 37 | triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet 38 | should contain vertex indices describing a triangle such that the 39 | triangle's normal points toward the viewer if the forward order of the 40 | triplet defines a counter-clockwise winding of the vertices. Gradients with 41 | respect to this tensor are not available. 42 | diffuse_colors: 3D float32 tensor with shape [batch_size, 43 | vertex_count, 3]. The RGB diffuse reflection in the range [0, 1] for 44 | each vertex. 45 | camera_position: 2D tensor with shape [batch_size, 3] or 1D tensor with 46 | shape [3] specifying the XYZ world space camera position. 47 | camera_lookat: 2D tensor with shape [batch_size, 3] or 1D tensor with 48 | shape [3] containing an XYZ point along the center of the camera's gaze. 49 | camera_up: 2D tensor with shape [batch_size, 3] or 1D tensor with shape 50 | [3] containing the up direction for the camera. The camera will have 51 | no tilt with respect to this direction. 52 | light_positions: a 3D tensor with shape [batch_size, light_count, 3]. The 53 | world space XYZ position of each light in the scene. 54 | light_intensities: a 3D tensor with shape [batch_size, light_count]. 55 | The intensity values for each light. Intensities may be above 1. 56 | image_width: int specifying desired output image width in pixels. 57 | image_height: int specifying desired output image height in pixels. 58 | sigma_val: parameter controlling the sharpness of the coverage distribution 59 | for a single triangle. A smaller sigma leads to a sharper distribution. 60 | gamma_val: temperature parameter controlling uniformity of the triangle 61 | probability distribution for a pixel in the depth aggregation. 62 | When gamma is 0, all probability mass will fall into the triangle 63 | with highest z, matching the behavior of z-buffering. 64 | blur_radius: float specifying the cutoff radius of soft-rasterization sampling 65 | in NDC-space. 66 | fov_y: float, 0D tensor, or 1D tensor with shape [batch_size] specifying 67 | desired output image y field of view in degrees. 68 | near_clip: float, 0D tensor, or 1D tensor with shape [batch_size] 69 | specifying near clipping plane distance. 70 | far_clip: float, 0D tensor, or 1D tensor with shape [batch_size] 71 | specifying far clipping plane distance. 72 | 73 | Returns: 74 | A 4D float32 tensor of shape [batch_size, image_height, image_width, 4] 75 | containing the lit RGBA color values for each image at each pixel. 76 | The RGB values are aggregated per-pixel according to the color aggregation 77 | formula in [1]. 78 | The alpha values are aggregated per-pixel according to the silhouette 79 | formula in [1]. 80 | 81 | [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for 82 | Image-based 3D Reasoning' 83 | Raises: 84 | ValueError: An invalid argument to the method is detected. 85 | """ 86 | if len(vertices.shape) != 3 or vertices.shape[-1] != 3: 87 | raise ValueError( 88 | "Vertices must have shape [batch_size, vertex_count, 3].") 89 | if len(triangles.shape) != 2 or triangles.shape[-1] != 3: 90 | raise ValueError( 91 | "Triangles must have shape [triangle_count, 3].") 92 | batch_size = vertices.shape[0] 93 | if len(light_positions.shape) != 3 or light_positions.shape[-1] != 3: 94 | raise ValueError( 95 | "light_positions must have shape [batch_size, light_count, 3].") 96 | if len(light_intensities.shape) != 2: 97 | raise ValueError( 98 | "light_intensities must have shape [batch_size, light_count].") 99 | if len(diffuse_colors.shape) != 3 or diffuse_colors.shape[-1] != 3: 100 | raise ValueError( 101 | "diffuse_colors must have shape [batch_size, vertex_count, 3].") 102 | if list(camera_position.shape) == [3]: 103 | camera_position = torch.unsqueeze(camera_position, 0).repeat(batch_size, 1) 104 | elif list(camera_position.shape) != [batch_size, 3]: 105 | raise ValueError( 106 | "camera_position must have shape [batch_size, 3] or [3].") 107 | if list(camera_lookat.shape) == [3]: 108 | camera_lookat = torch.unsqueeze(camera_lookat, 0).repeat(batch_size, 1) 109 | elif list(camera_lookat.shape) != [batch_size, 3]: 110 | raise ValueError( 111 | "camera_lookat must have shape [batch_size, 3] or [3].") 112 | if list(camera_up.shape) == [3]: 113 | camera_up = torch.unsqueeze(camera_up, 0).repeat(batch_size, 1) 114 | elif list(camera_up.shape) != [batch_size, 3]: 115 | raise ValueError("camera_up must have shape [batch_size, 3] or [3].") 116 | if isinstance(fov_y, float): 117 | fov_y = torch.tensor(batch_size * [fov_y], dtype=torch.float32) 118 | elif len(fov_y.shape) == 0: 119 | fov_y = torch.unsqueeze(fov_y, 0).repeat(batch_size) 120 | elif list(fov_y.shape) != [batch_size]: 121 | raise ValueError("fov_y must be a float, a 0D tensor, or a 1D tensor " 122 | "with shape [batch_size].") 123 | if isinstance(near_clip, float): 124 | near_clip = torch.tensor(batch_size * [near_clip], dtype=torch.float32) 125 | elif len(near_clip.shape) == 0: 126 | near_clip = torch.unsqueeze(near_clip, 0).repeat(batch_size) 127 | elif list(near_clip.shape) != [batch_size]: 128 | raise ValueError("near_clip must be a float, a 0D tensor, or a 1D " 129 | "tensor with shape [batch_size].") 130 | if isinstance(far_clip, float): 131 | far_clip = torch.tensor(batch_size * [far_clip], dtype=torch.float32) 132 | elif len(far_clip.shape) == 0: 133 | far_clip = torch.unsqueeze(far_clip, 0).repeat(batch_size) 134 | elif list(far_clip.shape) != [batch_size]: 135 | raise ValueError("far_clip must be a float, a 0D tensor, or a 1D " 136 | "tensor with shape [batch_size].") 137 | 138 | camera_matrices = camera_utils.look_at(camera_position, camera_lookat, 139 | camera_up) 140 | 141 | perspective_transforms = camera_utils.perspective( 142 | image_width / image_height, 143 | fov_y, 144 | near_clip, 145 | far_clip) 146 | 147 | clip_space_transforms = torch.matmul(perspective_transforms, camera_matrices) 148 | normals = meshes.compute_vertex_normals(vertices, triangles) 149 | 150 | return rasterize( 151 | vertices, 152 | triangles, 153 | ### vertex attributes 154 | normals, 155 | diffuse_colors, 156 | ### lighting 157 | light_positions, 158 | light_intensities, 159 | ### 160 | clip_space_transforms, 161 | image_width, 162 | image_height, 163 | sigma_val, 164 | gamma_val 165 | ) -------------------------------------------------------------------------------- /src/soft_mesh_renderer/test_rasterize.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import unittest 4 | 5 | from .rasterize import rasterize_batch, point_to_segment_nearest 6 | from ..common import debug_utils 7 | 8 | class RenderTest(unittest.TestCase): 9 | def test_point_to_segment_nearest(self): 10 | """ 11 | Test the point_to_segment_nearest function. 12 | """ 13 | # Test a point that is closest to the middle of the segment. 14 | point = torch.tensor([1.0, -1.0], dtype=torch.float32) 15 | segment = torch.tensor([[1.0, 1.0], [-1.0, -1.0]], dtype=torch.float32) 16 | expected_nearest = torch.tensor([0.0, 0.0], dtype=torch.float32) 17 | expected_t = 0.5 18 | nearest, t = point_to_segment_nearest(point, segment[0], segment[1]) 19 | torch.testing.assert_close(expected_nearest, nearest, 20 | msg="\n\texpected={}\n\tactual={}".format(expected_nearest, nearest)) 21 | torch.testing.assert_close(expected_t, float(t), 22 | msg="\n\texpected={}\n\tactual={}".format(expected_t, t)) 23 | 24 | # Test a point that is closest to the start of the segment. 25 | point = torch.tensor([0.0, 0.0], dtype=torch.float32) 26 | segment = torch.tensor([[1.0, 0.0], [1.0, 1.0]], dtype=torch.float32) 27 | expected_nearest = torch.tensor([1.0, 0.0], dtype=torch.float32) 28 | expected_t = 0.0 29 | nearest, t = point_to_segment_nearest(point, segment[0], segment[1]) 30 | torch.testing.assert_close(expected_nearest, nearest, 31 | msg="\n\texpected={}\n\tactual={}".format(expected_nearest, nearest)) 32 | torch.testing.assert_close(expected_t, float(t), 33 | msg="\n\texpected={}\n\tactual={}".format(expected_t, t)) 34 | 35 | # Test a point that is closest to the end of the segment. 36 | point = torch.tensor([0.0, 1.0], dtype=torch.float32) 37 | segment = torch.tensor([[1.0, 0.0], [1.0, 1.0]], dtype=torch.float32) 38 | expected_nearest = torch.tensor([1.0, 1.0], dtype=torch.float32) 39 | expected_t = 1.0 40 | nearest, t = point_to_segment_nearest(point, segment[0], segment[1]) 41 | torch.testing.assert_close(expected_nearest, nearest, 42 | msg="\n\texpected={}\n\tactual={}".format(expected_nearest, nearest)) 43 | torch.testing.assert_close(expected_t, float(t), 44 | msg="\n\texpected={}\n\tactual={}".format(expected_t, t)) 45 | 46 | def test_single_triangle_forward(self): 47 | """ 48 | Test the forward rasterization pass by rasterizing a single triangle to a 49 | small 10x10 image. The image coverage should look like so if hard-rasterized: 50 | 51 | 0 0 0 0 0 0 0 0 0 H 52 | 0 0 0 0 0 0 0 0 H 1 53 | 0 0 0 0 0 0 0 H 1 1 54 | 0 0 0 0 0 0 H 1 1 1 55 | 0 0 0 0 0 H 1 1 1 1 56 | 0 0 0 0 H 1 1 1 1 1 57 | 0 0 0 H 1 1 1 1 1 1 58 | 0 0 H 1 1 1 1 1 1 1 59 | 0 H 1 1 1 1 1 1 1 1 60 | H 1 1 1 1 1 1 1 1 1 61 | 62 | Where 1 indicates full coverage, 0 is no coverage, and H is half-covered 63 | (for hard-rasterization, this can be either considered in or out). 64 | """ 65 | 66 | # in eye space: z=-1 for all vertices, znear=0.5, zfar=2.5 67 | clip_space_vertices = torch.tensor( 68 | [ 69 | [1.0, -1.0, 0.25, 1.0], 70 | [1.0, 1.0, 0.25, 1.0], 71 | [-1.0, -1.0, 0.25, 1.0], 72 | ], 73 | dtype=torch.float32 74 | ) 75 | triangles = torch.tensor([[0, 1, 2]], dtype=torch.int32) 76 | world_space_vertices = torch.tensor( 77 | [ 78 | [1.0, -1.0, 0.0], 79 | [1.0, 1.0, 0.0], 80 | [-1.0, -1.0, 0.0], 81 | ], 82 | dtype=torch.float32 83 | ) 84 | normals = torch.tensor( 85 | [ 86 | [0.0, 0.0, 1.0], 87 | [0.0, 0.0, 1.0], 88 | [0.0, 0.0, 1.0], 89 | ], 90 | dtype=torch.float32 91 | ) 92 | diffuse_colors = torch.tensor( 93 | [ 94 | [1.0, 0.0, 0.0], 95 | [1.0, 0.0, 0.0], 96 | [1.0, 0.0, 0.0], 97 | ], 98 | dtype=torch.float32 99 | ) 100 | # one light at effectively infinity 101 | light_positions = torch.tensor([[0.0, 0.0, 100000.0]], dtype=torch.float32) 102 | light_intensities = torch.tensor([1.0], dtype=torch.float32) 103 | image_width, image_height = 10, 10 104 | sigma_val = 1e-5 105 | gamma_val = 1e-4 106 | 107 | ############################################################## 108 | # Case 1: blur radius smaller than a single screen-space pixel 109 | ############################################################## 110 | blur_radius = 0.01 111 | output = rasterize_batch( 112 | clip_space_vertices, 113 | triangles, 114 | ### vertex attributes 115 | world_space_vertices, 116 | normals, 117 | diffuse_colors, 118 | ### lighting 119 | light_positions, 120 | light_intensities, 121 | ### 122 | image_width, 123 | image_height, 124 | sigma_val, 125 | gamma_val, 126 | blur_radius 127 | ) 128 | expected_red = torch.tensor([ 129 | [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], 130 | [0., 0., 0., 0., 0., 0., 0., 0., 1., 1.], 131 | [0., 0., 0., 0., 0., 0., 0., 1., 1., 1.], 132 | [0., 0., 0., 0., 0., 0., 1., 1., 1., 1.], 133 | [0., 0., 0., 0., 0., 1., 1., 1., 1., 1.], 134 | [0., 0., 0., 0., 1., 1., 1., 1., 1., 1.], 135 | [0., 0., 0., 1., 1., 1., 1., 1., 1., 1.], 136 | [0., 0., 1., 1., 1., 1., 1., 1., 1., 1.], 137 | [0., 1., 1., 1., 1., 1., 1., 1., 1., 1.], 138 | [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], 139 | ], dtype=torch.float32) 140 | expected_green = torch.zeros_like(expected_red) 141 | expected_blue = torch.zeros_like(expected_red) 142 | expected_alpha = torch.tensor([ 143 | [0., 0., 0., 0., 0., 0., 0., 0., 0., .5], 144 | [0., 0., 0., 0., 0., 0., 0., 0., .5, 1.], 145 | [0., 0., 0., 0., 0., 0., 0., .5, 1., 1.], 146 | [0., 0., 0., 0., 0., 0., .5, 1., 1., 1.], 147 | [0., 0., 0., 0., 0., .5, 1., 1., 1., 1.], 148 | [0., 0., 0., 0., .5, 1., 1., 1., 1., 1.], 149 | [0., 0., 0., .5, 1., 1., 1., 1., 1., 1.], 150 | [0., 0., .5, 1., 1., 1., 1., 1., 1., 1.], 151 | [0., .5, 1., 1., 1., 1., 1., 1., 1., 1.], 152 | [.5, 1., 1., 1., 1., 1., 1., 1., 1., 1.], 153 | ], dtype=torch.float32) 154 | 155 | torch.testing.assert_close(output[..., 0], expected_red) 156 | torch.testing.assert_close(output[..., 1], expected_green) 157 | torch.testing.assert_close(output[..., 2], expected_blue) 158 | torch.testing.assert_close(output[..., 3], expected_alpha) 159 | ############################################################## 160 | # Case 2: blur radius spans a single screen-space pixel 161 | ############################################################## 162 | # Add a small epsilon to capture samples right on the edge. 163 | blur_radius2 = 0.1 * np.sqrt(2.0) + 1e-6 164 | # This will cause samples blur_radius2 away from a triangle to 165 | # have a nonzero coverage (1e-3) by the triangle. This is needed 166 | # for samples that lie exactly on the edge to work. 167 | sigma_val2 = -blur_radius2**2 / torch.special.logit(torch.tensor(1e-3)) 168 | output2 = rasterize_batch( 169 | clip_space_vertices, 170 | triangles, 171 | ### vertex attributes 172 | world_space_vertices, 173 | normals, 174 | diffuse_colors, 175 | ### lighting 176 | light_positions, 177 | light_intensities, 178 | ### 179 | image_width, 180 | image_height, 181 | sigma_val2, 182 | gamma_val, 183 | blur_radius2 184 | ) 185 | expected_red2 = torch.tensor([ 186 | [0., 0., 0., 0., 0., 0., 0., 0., 1., 1.], 187 | [0., 0., 0., 0., 0., 0., 0., 1., 1., 1.], 188 | [0., 0., 0., 0., 0., 0., 1., 1., 1., 1.], 189 | [0., 0., 0., 0., 0., 1., 1., 1., 1., 1.], 190 | [0., 0., 0., 0., 1., 1., 1., 1., 1., 1.], 191 | [0., 0., 0., 1., 1., 1., 1., 1., 1., 1.], 192 | [0., 0., 1., 1., 1., 1., 1., 1., 1., 1.], 193 | [0., 1., 1., 1., 1., 1., 1., 1., 1., 1.], 194 | [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], 195 | [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], 196 | ], dtype=torch.float32) 197 | expected_green2 = torch.zeros_like(expected_red2) 198 | expected_blue2 = torch.zeros_like(expected_red2) 199 | expected_alpha2 = torch.tensor([ 200 | [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000], 201 | [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9693], 202 | [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 0.9693], 203 | [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 0.9693], 204 | [0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 0.9693], 205 | [0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 1.0000, 0.9693], 206 | [0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 1.0000, 1.0000, 0.9693], 207 | [0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9693], 208 | [0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9693], 209 | [0.5000, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693] 210 | ], dtype=torch.float32) 211 | 212 | torch.testing.assert_close(output2[..., 0], expected_red2) 213 | torch.testing.assert_close(output2[..., 1], expected_green2) 214 | torch.testing.assert_close(output2[..., 2], expected_blue2) 215 | torch.testing.assert_close(output2[..., 3], expected_alpha2, atol=1e-04, rtol=0) 216 | 217 | def test_optimize_single_triangle_translation(self): 218 | """ 219 | Test optimizing a single triangle's xy-translation. 220 | 221 | The test proceeds by rasterizing a single triangle to a 10x10 image. 222 | The starting triangle slightly overlaps the target triangle. To 223 | reach the target, the triangle must be translated to the right by its 224 | full length. 225 | """ 226 | translation_x = torch.tensor(0., requires_grad=True) 227 | target_translation_x = 0.25 228 | # in eye space: z=-1 for all vertices, znear=0.5, zfar=2.5 229 | clip_space_vertices = torch.tensor( 230 | [ 231 | [-0.5, 0.0, 0.25, 1.0], 232 | [0.5, 1.0, 0.25, 1.0], 233 | [-0.5, 1.0, 0.25, 1.0], 234 | ], 235 | dtype=torch.float32 236 | ) 237 | triangles = torch.tensor([[0, 1, 2]], dtype=torch.int32) 238 | world_space_vertices = torch.tensor( 239 | [ 240 | [-0.5, 0.0, 0.0], 241 | [0.5, 1.0, 0.0], 242 | [-0.5, 1.0, 0.0], 243 | ], 244 | dtype=torch.float32 245 | ) 246 | normals = torch.tensor( 247 | [ 248 | [0.0, 0.0, 1.0], 249 | [0.0, 0.0, 1.0], 250 | [0.0, 0.0, 1.0], 251 | ], 252 | dtype=torch.float32 253 | ) 254 | diffuse_colors = torch.tensor( 255 | [ 256 | [1.0, 0.0, 0.0], 257 | [1.0, 0.0, 0.0], 258 | [1.0, 0.0, 0.0], 259 | ], 260 | dtype=torch.float32 261 | ) 262 | # one light at effectively infinity 263 | light_positions = torch.tensor([[0.0, 0.0, 100000.0]], dtype=torch.float32) 264 | light_intensities = torch.tensor([1.0], dtype=torch.float32) 265 | image_width, image_height = 10, 10 266 | sigma_val = 1e-5 267 | gamma_val = 1e-1 268 | 269 | # rasterize target image 270 | target_output = rasterize_batch( 271 | clip_space_vertices + torch.tensor([target_translation_x, 0.0, 0.0, 0.0]), 272 | triangles, 273 | ### vertex attributes 274 | world_space_vertices + torch.tensor([target_translation_x, 0.0, 0.0]), 275 | normals, 276 | diffuse_colors, 277 | ### lighting 278 | light_positions, 279 | light_intensities, 280 | ### 281 | image_width, 282 | image_height, 283 | sigma_val, 284 | gamma_val, 285 | 0.01 # target image should not be blurred 286 | ) 287 | 288 | blur_radius = 0.0 289 | sigma_saturation_radius = 0.5 290 | sigma_val = -sigma_saturation_radius**2 / torch.special.logit(torch.tensor(1e-5)) 291 | def stepfn(): 292 | clip_space_translation = torch.zeros_like(clip_space_vertices) 293 | world_space_translation = torch.zeros_like(world_space_vertices) 294 | clip_space_translation[:, 0] = translation_x 295 | world_space_translation[:, 0] = translation_x 296 | 297 | output = rasterize_batch( 298 | clip_space_vertices + clip_space_translation, 299 | triangles, 300 | ### vertex attributes 301 | world_space_vertices + world_space_translation, 302 | normals, 303 | diffuse_colors, 304 | ### lighting 305 | light_positions, 306 | light_intensities, 307 | ### 308 | image_width, 309 | image_height, 310 | sigma_val, 311 | gamma_val, 312 | blur_radius 313 | ) 314 | 315 | loss = torch.mean(torch.abs(output - target_output)) 316 | loss.backward() 317 | return loss 318 | 319 | # optimization loop: rasterize then backwards until optimized 320 | optimizer = torch.optim.SGD([translation_x], 0.7, 0.1) 321 | for e in range(50): 322 | optimizer.zero_grad() 323 | optimizer.step(stepfn) 324 | 325 | pixel_width = 0.2 # 10x10 grid and NDC range from -1.0 to +1.0 326 | torch.testing.assert_close(float(translation_x), target_translation_x, atol=pixel_width/2, rtol=0.0) 327 | 328 | 329 | if __name__ == "__main__": 330 | unittest.main() --------------------------------------------------------------------------------