├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── README.md
├── requirements.txt
└── src
    ├── __init__.py
    ├── common
        ├── __init__.py
        ├── camera_utils.py
        ├── debug_utils.py
        ├── meshes.py
        ├── obj_utils.py
        └── shapes.py
    ├── examples
        ├── example1.png
        ├── example1.py
        ├── example1b.png
        ├── example1b.py
        ├── example4.mp4
        ├── example4.py
        ├── example4_target.png
        ├── example5.mp4
        ├── example5.py
        ├── example5b.mp4
        ├── example5b.py
        ├── example5b_target.png
        ├── example6.mp4
        ├── example6.py
        ├── example6_target.png
        ├── example6b.mp4
        ├── example6b.py
        ├── example6b_target.png
        ├── example7b.py
        ├── example7b_target1.png
        ├── example7b_target2.png
        ├── example7b_target3.png
        ├── example7b_target4.png
        └── teapot.obj
    ├── mesh_renderer
        ├── README.md
        ├── __init__.py
        ├── kernels
        │   ├── rasterize_triangles.cpp
        │   └── setup.py
        ├── mesh_renderer_test.py
        ├── rasterize.py
        ├── rasterize_triangles_ext.py
        ├── rasterize_triangles_python.py
        ├── rasterize_triangles_test.py
        ├── render.py
        ├── test_data
        │   ├── Barycentrics_Cube.png
        │   ├── Colored_Cube_0.png
        │   ├── Colored_Cube_1.png
        │   ├── External_Triangle.png
        │   ├── Gray_Cube_0.png
        │   ├── Gray_Cube_1.png
        │   ├── Inside_Box.png
        │   ├── Perspective_Corrected_Triangle.png
        │   ├── Simple_Tetrahedron.png
        │   ├── Simple_Triangle.png
        │   ├── Unlit_Cube_0.png
        │   └── Unlit_Cube_1.png
        └── test_utils.py
    └── soft_mesh_renderer
        ├── README.md
        ├── __init__.py
        ├── quadtree.py
        ├── rasterize.py
        ├── render.py
        └── test_rasterize.py


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: Python application
 5 | 
 6 | on: [push, pull_request]
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   build:
13 | 
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v3
18 |     - name: Set up Python 3.10
19 |       uses: actions/setup-python@v3
20 |       with:
21 |         python-version: "3.10"
22 |     - name: Install dependencies
23 |       run: |
24 |         python -m pip install --upgrade pip
25 |         pip install flake8 pytest
26 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
27 |     - name: Test with pytest
28 |       run: |
29 |         pytest


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # celery beat schedule file
 96 | celerybeat-schedule
 97 | 
 98 | # SageMath parsed files
 99 | *.sage.py
100 | 
101 | # Environments
102 | .env
103 | .venv
104 | env/
105 | venv/
106 | ENV/
107 | env.bak/
108 | venv.bak/
109 | 
110 | # Spyder project settings
111 | .spyderproject
112 | .spyproject
113 | 
114 | # Rope project settings
115 | .ropeproject
116 | 
117 | # mkdocs documentation
118 | /site
119 | 
120 | # mypy
121 | .mypy_cache/
122 | .dmypy.json
123 | dmypy.json
124 | 
125 | # Pyre type checker
126 | .pyre/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | This repository contains implementations of two differentiable 3D mesh renderers using PyTorch:
 4 | - `mesh_renderer`: A port of Google's [tf_mesh_renderer](https://github.com/google/tf_mesh_renderer) from Tensorflow to PyTorch. Based on the barycentric formulation from [Genova et al. 2018 "Unsupervised training for 3d morphable model regression."](https://openaccess.thecvf.com/content_cvpr_2018/papers/Genova_Unsupervised_Training_for_CVPR_2018_paper.pdf)
 5 | - `soft_mesh_renderer`: An alternate implementation of [SoftRas](https://github.com/ShichenLiu/SoftRas) that I built for my own learning. Based on the probabilistic rasterization formulation by [Liu et al. 2019 "Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning"](https://arxiv.org/abs/1904.01786).
 6 | 
 7 | # Setup
 8 | 
 9 | 1. Create a virtual environment with `python3 -m venv env`
10 | 2. Activate it with `source env/bin/activate`
11 | 3. Install external dependencies with `pip install -r requirements.txt`
12 | 
13 | Some additional setup is required to use the optimized kernel for the barycentric renderer. See [docs](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/mesh_renderer/README.md) for more.
14 | 
15 | # Testing
16 | 
17 | Tests are included for both renderers.
18 | 
19 | - mesh_renderer: See [mesh_renderer docs](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/mesh_renderer/README.md) for how to run these tests.
20 | - soft_mesh_renderer: See [soft_mesh_renderer docs](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/soft_mesh_renderer/README.md) for how to run these tests.


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | filelock==3.13.1
 2 | fsspec==2023.12.2
 3 | imageio==2.33.1
 4 | imageio-ffmpeg==0.4.9
 5 | Jinja2==3.1.2
 6 | lazy_loader==0.3
 7 | MarkupSafe==2.1.3
 8 | mpmath==1.3.0
 9 | networkx==3.2.1
10 | numpy==1.26.2
11 | packaging==23.2
12 | Pillow==10.1.0
13 | scikit-image==0.22.0
14 | scipy==1.11.4
15 | sympy==1.12
16 | tifffile==2023.12.9
17 | torch==2.1.2
18 | typing_extensions==4.9.0
19 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/__init__.py


--------------------------------------------------------------------------------
/src/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/common/__init__.py


--------------------------------------------------------------------------------
/src/common/camera_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import math
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | 
 10 | def euler_matrices(angles):
 11 |     """Compute a XYZ Tait-Bryan (improper Euler angle) rotation.
 12 | 
 13 |     Return 4x4 matrices for convenient multiplication with other transformations.
 14 | 
 15 |     Args:
 16 |       angles: a [batch_size, 3] tensor containing X, Y, and Z angles in radians.
 17 | 
 18 |     Returns:
 19 |       a [batch_size, 4, 4] tensor of matrices.
 20 |     """
 21 |     s = torch.sin(angles)
 22 |     c = torch.cos(angles)
 23 |     # Rename variables for readability in the matrix definition below.
 24 |     c0, c1, c2 = (c[:, 0], c[:, 1], c[:, 2])
 25 |     s0, s1, s2 = (s[:, 0], s[:, 1], s[:, 2])
 26 | 
 27 |     zeros = torch.zeros_like(s[:, 0])
 28 |     ones = torch.ones_like(s[:, 0])
 29 | 
 30 |     flattened = torch.cat(
 31 |         [
 32 |             c2*c1, c2*s1*s0 - c0*s2, s2*s0 + c2*c0*s1, zeros,
 33 |             c1*s2, c2*c0 + s2*s1*s0, c0*s2*s1 - c2*s0, zeros,
 34 |             -s1, c1*s0, c1*c0, zeros,
 35 |             zeros, zeros, zeros, ones
 36 |         ],
 37 |         dim=0)
 38 |     reshaped = torch.reshape(flattened, [4, 4, -1])
 39 |     # transpose dims [0, 1, 2] -> [2, 0, 1]
 40 |     reshaped = torch.transpose(reshaped, 0, 1)
 41 |     reshaped = torch.transpose(reshaped, 0, 2)
 42 |     return reshaped
 43 | 
 44 | 
 45 | def look_at(eye, center, world_up):
 46 |     """Compute camera viewing matrices.
 47 | 
 48 |     Functionality mimes gluLookAt (external/GL/glu/include/GLU/glu.h).
 49 | 
 50 |     Args:
 51 |         eye: 2D float32 tensor with shape [batch_size, 3] containing the XYZ
 52 |             world space position of the camera.
 53 |         center: 2D float32 tensor with shape [batch_size, 3] containing a
 54 |             position along the center of the camera's gaze line.
 55 |         world_up: 2D float32 tensor with shape [batch_size, 3] specifying the
 56 |             world's up direction; the output camera will have no tilt with
 57 |             respect to this direction.
 58 | 
 59 |     Returns:
 60 |         A [batch_size, 4, 4] float tensor containing a right-handed camera
 61 |         extrinsics matrix that maps points from world space to points in eye
 62 |         space.
 63 |     """
 64 |     batch_size = center.shape[0]
 65 |     vector_degeneracy_cutoff = 1e-6
 66 |     forward = center - eye
 67 |     forward_norm = torch.linalg.norm(forward, ord=None, dim=1, keepdim=True).clone()
 68 |     np.testing.assert_array_less(vector_degeneracy_cutoff, forward_norm,
 69 |         err_msg="Camera matrix is degenerate because eye and center are close.")
 70 |     forward = forward/forward_norm
 71 | 
 72 |     to_side = torch.cross(forward, world_up, dim=-1)
 73 |     to_side_norm = torch.linalg.norm(to_side, ord=None, dim=1, keepdim=True).clone()
 74 |     np.testing.assert_array_less(vector_degeneracy_cutoff, to_side_norm,
 75 |         err_msg="Camera matrix is degenerate because up and gaze are too close "
 76 |                 "or because up is degenerate.")
 77 |     to_side = to_side/to_side_norm
 78 |     cam_up = torch.cross(to_side, forward, dim=-1)
 79 | 
 80 |     w_column = torch.tensor(
 81 |         batch_size * [[0., 0., 0., 1.]], dtype=torch.float32) # [batch_size, 4]
 82 |     w_column = torch.reshape(w_column, [batch_size, 4, 1])
 83 |     view_rotation = torch.stack(
 84 |         [to_side, cam_up, -forward,
 85 |          torch.zeros_like(to_side, dtype=torch.float32)],
 86 |         dim=1) # [batch_size, 4, 3] matrix
 87 |     view_rotation = torch.cat([view_rotation, w_column],
 88 |                               dim=2) # [batch_size, 4, 4]
 89 | 
 90 |     identity_batch = torch.unsqueeze(torch.eye(3), 0).repeat([batch_size, 1, 1])
 91 |     view_translation = torch.cat([identity_batch, torch.unsqueeze(-eye, 2)], 2)
 92 |     view_translation = torch.cat(
 93 |         [view_translation,
 94 |          torch.reshape(w_column, [batch_size, 1, 4])], 1)
 95 |     camera_matrices = torch.matmul(view_rotation, view_translation)
 96 |     return camera_matrices
 97 | 
 98 | 
 99 | def perspective(aspect_ratio, fov_y, near_clip, far_clip):
100 |     """Computes perspective transformation matrices.
101 | 
102 |     Functionality mimes gluPerspective (external/GL/glu/include/GLU/glu.h).
103 |     See:
104 |     https://unspecified.wordpress.com/2012/06/21/calculating-the-gluperspective-matrix-and-other-opengl-matrix-maths/
105 | 
106 |     Args:
107 |         aspect_ratio: float value specifying the image aspect ratio
108 |             (width/height).
109 |         fov_y: 1D float32 Tensor with shape [batch_size] specifying output
110 |             vertical field of views in degrees.
111 |         near_clip: 1D float32 Tensor with shape [batch_size] specifying near
112 |             clipping plane distance.
113 |         far_clip: 1D float32 Tensor with shape [batch_size] specifying far
114 |             clipping plane distance.
115 | 
116 |     Returns:
117 |         A [batch_size, 4, 4] float tensor that maps from right-handed points in
118 |         eye space to left-handed points in clip space.
119 |     """
120 |     # The multiplication of fov_y by pi/360.0 simultaneously converts to radians
121 |     # and adds the half-angle factor of .5.
122 |     focal_lengths_y = 1.0 / torch.tan(fov_y * (math.pi / 360.0))
123 |     depth_range = far_clip - near_clip
124 |     p_22 = -(far_clip + near_clip) / depth_range
125 |     p_23 = -2.0 * (far_clip * near_clip / depth_range)
126 | 
127 |     zeros = torch.zeros_like(p_23, dtype=torch.float32)
128 |     perspective_transform = torch.cat(
129 |         [
130 |             focal_lengths_y / aspect_ratio, zeros, zeros, zeros,
131 |             zeros, focal_lengths_y, zeros, zeros,
132 |             zeros, zeros, p_22, p_23,
133 |             zeros, zeros, -torch.ones_like(p_23, dtype=torch.float32), zeros
134 |         ], dim=0)
135 |     perspective_transform = torch.reshape(perspective_transform, [4, 4, -1])
136 |     # transpose dimensions [0, 1, 2] -> [2, 0, 1]
137 |     perspective_transform = torch.transpose(perspective_transform, 0, 1)
138 |     perspective_transform = torch.transpose(perspective_transform, 0, 2)
139 |     return perspective_transform
140 | 
141 | 
142 | def transform_homogeneous(matrices, vertices):
143 |     """Applies batched 4x4 homogeneous matrix transforms to 3D vertices.
144 | 
145 |     The vertices are input and output as row-major, but are interpreted as
146 |     column vectors multiplied on the right-hand side of the matrices. More
147 |     explicitly, this function computes (MV^T)^T.
148 |     Vertices are assumed to be xyz, and are extended to xyzw with w=1.
149 | 
150 |     Args:
151 |         matrices: a [batch_size, 4, 4] tensor of matrices.
152 |         vertices: a [batch_size, N, 3] tensor of xyz vertices.
153 | 
154 |     Returns:
155 |         a [batch_size, N , 4] tensor of xyzw vertices.
156 | 
157 |     Raises:
158 |         ValueError: if matrices or vertices have the wrong number of dimensions.
159 |     """
160 |     if len(matrices.shape) != 3:
161 |         raise ValueError(
162 |             "matrices must have 3 dimensions (missing batch dimension?)")
163 |     if len(vertices.shape) != 3:
164 |         raise ValueError(
165 |             "vertices must have 3 dimensions (missing batch dimension?)")
166 |     homogeneous_coord = torch.ones(
167 |         [vertices.shape[0], vertices.shape[1], 1], dtype=torch.float32)
168 |     vertices_homogeneous = torch.cat([vertices, homogeneous_coord], 2)
169 | 
170 |     return torch.matmul(vertices_homogeneous, matrices.transpose(1, 2))
171 | 


--------------------------------------------------------------------------------
/src/common/debug_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def debug_tensor(tensor, msg=""):
 4 |     torch.set_printoptions(profile="full", linewidth=200)
 5 |     print("[debug tensor] {}".format(msg))
 6 |     print(tensor)
 7 |     torch.set_printoptions(profile="default", linewidth=80)
 8 | 
 9 | def check_isnan_isinf(tensor, msg=""):
10 |     if torch.isnan(tensor).any() or torch.isinf(tensor).any():
11 |         raise ValueError(msg)


--------------------------------------------------------------------------------
/src/common/meshes.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def compute_vertex_normals(vertices, triangles):
 4 |     """
 5 |     Computes vertex normals for a triangle mesh by first computing
 6 |     face normals, then averaging the normals on incident vertices.
 7 |     The resulting vectors are normalized.
 8 | 
 9 |     Args:
10 |       vertices: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each
11 |         triplet is an xyz position in world space.
12 |       triangles: 2D int32 tensor with shape [triangle_count, 3].
13 | 
14 |     Returns:
15 |     - A tensor with shape [batch_size, vertex_count, 3] providing per-vertex normal
16 |       vectors.
17 |     """
18 |     batch_size = vertices.shape[0]
19 |     normals = torch.zeros_like(vertices)
20 |     for b in range(batch_size):
21 |         # vertices_faces[i][j] gives the vertex corresponding to triangles[i][j]
22 |         vertices_faces = vertices[b, triangles.long(), :] # [vertex_count, 3, 3]
23 |         normals[b].index_add_(0, triangles[:, 0].long(),
24 |             torch.cross(vertices_faces[:, 1] - vertices_faces[:, 0],
25 |                         vertices_faces[:, 2] - vertices_faces[:, 0])
26 |         )
27 |         normals[b].index_add_(0, triangles[:, 1].long(),
28 |             torch.cross(vertices_faces[:, 2] - vertices_faces[:, 1],
29 |                         vertices_faces[:, 0] - vertices_faces[:, 1])
30 |         )
31 |         normals[b].index_add_(0, triangles[:, 2].long(),
32 |             torch.cross(vertices_faces[:, 0] - vertices_faces[:, 2],
33 |                         vertices_faces[:, 1] - vertices_faces[:, 2])
34 |         )
35 |     normals = torch.nn.functional.normalize(normals, eps=1e-6, p=2, dim=-1)
36 |     return normals


--------------------------------------------------------------------------------
/src/common/obj_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | 
  4 | from . import meshes
  5 | 
  6 | def load_obj(filename, normalize=True):
  7 |     """
  8 |     Load Wavefront .obj file.
  9 |     This function only supports vertices (v x x x), normals (vn x x x),
 10 |     and faces (f x x x). Per-face-vertex normals are not supported;
 11 |     they will be averaged out so that each vertex gets exactly 1 normal.
 12 | 
 13 |     Returns:
 14 |     - vertices, faces, normals: Tuple of tensors with shapes
 15 |         ([vertex_count, 3], [triangle_count, 3], [vertex_count, 3])
 16 |         and types (float32, int32, float32).
 17 |     """
 18 | 
 19 |     vertices = []
 20 |     all_normals = []
 21 |     vertex_id_to_normals = {}
 22 |     faces = []
 23 |     with open(filename) as f:
 24 |         lines = f.readlines()
 25 | 
 26 |     for line in lines:
 27 |         parts = line.split()
 28 |         if len(parts) == 0:
 29 |             continue
 30 |         if parts[0] == 'v':
 31 |             vertices.append([float(v) for v in parts[1:4]])
 32 |         elif parts[0] == 'vn':
 33 |             all_normals.append([float(v) for v in parts[1:4]])
 34 |         elif parts[0] == 'f':
 35 |             face_vertices = line.split()[1:]
 36 |             if len(face_vertices) > 3:
 37 |                 print("warning: encountered a face with more than 3 vertices," +
 38 |                     "extra vertices will be skipped")
 39 |             faces.append([int(face_vertex.split('/')[0]) for face_vertex in face_vertices[:3]])
 40 |             if len(face_vertices[0].split('/')) > 2:
 41 |                 # handle face-vertex normal spec: `f v1//vn1 v2//vn2 v3//vn3`
 42 |                 for face_vertex in face_vertices[:3]:
 43 |                     parts = face_vertex.split('/')
 44 |                     vertex_id = int(parts[0]) - 1
 45 |                     normal_id = int(parts[2]) - 1
 46 |                     if vertex_id not in vertex_id_to_normals:
 47 |                         vertex_id_to_normals[vertex_id] = []
 48 |                     vertex_id_to_normals[vertex_id].append(normal_id)
 49 | 
 50 |     vertices = torch.tensor(vertices, dtype=torch.float32)
 51 |     faces = torch.tensor(faces, dtype=torch.int32) - 1
 52 |     all_normals = torch.tensor(all_normals, dtype=torch.float32)
 53 |     normals = torch.zeros_like(vertices)
 54 | 
 55 |     if len(vertex_id_to_normals) == 0:
 56 |         normals = meshes.compute_vertex_normals(
 57 |             vertices[None, :, :], faces)[0]
 58 |     else:
 59 |         # average all face-vertex normals to a single normal vector per vertex
 60 |         for i in range(len(vertices)):
 61 |             if i not in vertex_id_to_normals:
 62 |                 normals[i] = torch.ones(3)
 63 |                 continue
 64 |             n = len(vertex_id_to_normals[i])
 65 |             for j in vertex_id_to_normals[i]:
 66 |                 normals[i] += all_normals[j] / n
 67 |         normals = torch.nn.functional.normalize(normals, p=2.0, dim=1)
 68 | 
 69 |     if normalize:
 70 |         # normalize into a unit cube centered around zero
 71 |         vertices -= vertices.min(0)[0][None, :]
 72 |         vertices /= torch.abs(vertices).max()
 73 |         vertices *= 2
 74 |         vertices -= vertices.max(0)[0][None, :] / 2
 75 | 
 76 |     return vertices, faces, normals
 77 | 
 78 | def save_obj(filename, vertices, faces, normals=None):
 79 |     """
 80 |     Save mesh to Wavefront .obj file.
 81 |     This function only supports vertices (v x x x), normals (vn x x x),
 82 |     and faces (f x x x). Per-face-vertex normals are not supported;
 83 |     normals must be the same shape as vertices and are assumed to be 1-1 to
 84 |     vertices.
 85 |     """
 86 |     if len(vertices.shape) != 2 or vertices.shape[1] != 3:
 87 |         raise ValueError("vertices must have shape [vertex_count, 3]")
 88 |     if len(faces.shape) != 2 or faces.shape[1] != 3:
 89 |         raise ValueError("faces must have shape [triangle_count, 3]")
 90 |     if normals is not None:
 91 |         if len(normals.shape) != 2 or normals.shape[1] != 3:
 92 |             raise ValueError("normals must have shape [vertex_count, 3]")
 93 |     with open(filename, "w") as f:
 94 |         for vertex in vertices:
 95 |             f.write("v {} {} {}\n".format(vertex[0], vertex[1], vertex[2]))
 96 |         for face in faces:
 97 |             if normals is not None:
 98 |                 f.write("f {}//{} {}//{} {}//{}\n".format(
 99 |                     face[0] + 1, face[0] + 1,
100 |                     face[1] + 1, face[1] + 1,
101 |                     face[2] + 1, face[2] + 1
102 |                 ))
103 |             else:
104 |                 f.write("f {} {} {}\n".format(
105 |                     face[0] + 1,
106 |                     face[1] + 1,
107 |                     face[2] + 1,
108 |                 ))
109 |         if normals is not None:
110 |             for normal in normals:
111 |                 f.write("vn {} {} {}\n".format(normal[0], normal[1], normal[2]))


--------------------------------------------------------------------------------
/src/common/shapes.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | def sphere(radius, resolution=25):
  5 |     """
  6 |     Creates a triangle mesh representing a sphere with the given radius.
  7 |     The mesh will be centered on the origin.
  8 | 
  9 |     Returns: A tuple (vertices, triangles, normals)
 10 |     - vertices: Float tensor of shape [num_vertices, 3] giving vertices in XYZ world space.
 11 |     - triangles: Int32 tensor of shape [num_triangles, 3] giving vertex IDs of triangles.
 12 |         The vertex IDs are ordered such that they wind CCW with respect to a viewer looking
 13 |         at the outside of the sphere.
 14 |     - normals: Float tensor of shape [num_vertices, 3] giving vertex normals in XYZ world space.
 15 |         The vectors are normalized.
 16 |     """
 17 |     # We divide the sphere in K uniform longitude (phi) intervals.
 18 |     # Each longitude line starts and ends at the vertical poles of the sphere,
 19 |     # which are special vertices. Within each line, we will insert K vertices
 20 |     # between the poles by uniformly splitting latitude (theta).
 21 |     #
 22 |     # Thus, within the latitude lines not including the poles, we have equatorial
 23 |     # strips which are also uniformly split by the longitude lines. Each adjacent
 24 |     # pair of (theta, theta + theta_step) defines an equatorial strip, then when intersected
 25 |     # with an adjacent pair of phi, defines a quad on the surface of the sphere with
 26 |     # top-left corner at (theta, phi) and bottom-right corner at (theta + theta_step,
 27 |     # phi + phi_step). These quads are further split into 2 triangles each.
 28 |     #
 29 |     # The poles then connect to the adjacent latitude lines via the longitude lines.
 30 |     # Each pair of adjacent longitude lines (phi) forms a triangle.
 31 |     K = resolution
 32 |     phi_step = 2.*np.pi/K
 33 |     theta_step = np.pi/(K+1)
 34 |     num_vertices = K**2 + 2
 35 |     num_triangles = (2 * (K - 1)*K) + 2 * K
 36 |     vertices = torch.zeros([num_vertices, 3], dtype=torch.float32)
 37 |     triangles = torch.zeros([num_triangles, 3], dtype=torch.int32)
 38 |     i = 0
 39 |     # Vertex ids are grouped by latitude line:
 40 |     # 0..K-1 are theta == 1*theta_step
 41 |     # K..2K-1 are theta == 2*theta_step
 42 |     # ...
 43 |     # (K-1)*K..K*K-1 are theta == K*theta_step
 44 |     for theta in np.linspace(theta_step, np.pi - theta_step, K, endpoint=True):
 45 |         for phi in np.linspace(0., 2.*np.pi, K, endpoint=False):
 46 |             vertices[i] = radius * torch.tensor([
 47 |                 np.sin(theta) * np.sin(phi),
 48 |                 np.cos(theta),
 49 |                 np.sin(theta) * np.cos(phi),
 50 |             ])
 51 |             i += 1
 52 |     # Last 2 vertex ids are the poles
 53 |     vertices[num_vertices - 2] = torch.tensor([0., 1., 0.])
 54 |     vertices[num_vertices - 1] = torch.tensor([0., -1., 0.])
 55 | 
 56 |     triangle_id = 0
 57 |     for i in range(K-1):
 58 |         for j in range(K):
 59 |             top_left = i * K + j
 60 |             top_right = i * K + j + 1
 61 |             bottom_left = (i + 1) * K + j
 62 |             bottom_right = (i + 1) * K + j + 1
 63 |             triangles[triangle_id] = torch.tensor([top_left, bottom_left, top_right])
 64 |             triangles[triangle_id + 1] = torch.tensor([top_right, bottom_left, bottom_right])
 65 |             triangle_id += 2
 66 |     # connect top pole to topmost latitude line
 67 |     for i in range(K):
 68 |         left = i
 69 |         right = i+1
 70 |         top = num_vertices - 2
 71 |         triangles[triangle_id] = torch.tensor([top, left, right])
 72 |         triangle_id += 1
 73 |     # connect bottom pole to bottommost latitude line
 74 |     for i in range(K):
 75 |         left = (K-1)*K + i
 76 |         right = (K-1)*K + i+1
 77 |         bottom = num_vertices - 1
 78 |         triangles[triangle_id] = torch.tensor([bottom, right, left])
 79 |         triangle_id += 1
 80 |     normals = torch.nn.functional.normalize(vertices, p=2.0, dim=-1)
 81 |     return vertices, triangles, normals
 82 | 
 83 | def cube(size):
 84 |     """
 85 |     Creates a triangle mesh representing a cube with the given side length.
 86 |     The mesh will be centered on the origin.
 87 | 
 88 |     Returns: A tuple (vertices, triangles, normals)
 89 |     - vertices: Float tensor of shape [num_vertices, 3] giving vertices in XYZ world space.
 90 |     - triangles: Int32 tensor of shape [num_triangles, 3] giving vertex IDs of triangles.
 91 |         The vertex IDs are ordered such that they wind CCW with respect to a viewer looking
 92 |         at the outside of the sphere.
 93 |     - normals: Float tensor of shape [num_vertices, 3] giving vertex normals in XYZ world space.
 94 |         The vectors are normalized. Note that face-vertex normals are not supported and so
 95 |         the vertex normals will be the average of the normals of the incident faces.
 96 |     """
 97 |     vertices = 0.5 * size * torch.tensor(
 98 |         [[-1, -1, 1], [-1, -1, -1], [-1, 1, -1], [-1, 1, 1], [1, -1, 1],
 99 |         [1, -1, -1], [1, 1, -1], [1, 1, 1]],
100 |         dtype=torch.float32)
101 |     normals = torch.nn.functional.normalize(vertices, p=2.0, dim=-1)
102 |     triangles = torch.tensor(
103 |         [
104 |             [2, 1, 0],
105 |             [0, 3, 2],
106 |             [6, 2, 3],
107 |             [3, 7, 6],
108 |             [5, 6, 7],
109 |             [7, 4, 5],
110 |             [1, 5, 4],
111 |             [4, 0, 1],
112 |             [2, 6, 5],
113 |             [5, 1, 2],
114 |             [0, 4, 7],
115 |             [7, 3, 0]
116 |         ],
117 |         dtype=torch.int32)
118 |     return vertices, triangles, normals


--------------------------------------------------------------------------------
/src/examples/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example1.png


--------------------------------------------------------------------------------
/src/examples/example1.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Example 1: Rendering a teapot from arbitrary angle.
 3 | """
 4 | 
 5 | import os
 6 | import argparse
 7 | 
 8 | import torch
 9 | import numpy as np
10 | from skimage import io
11 | 
12 | from .. import mesh_renderer as mr
13 | from ..common import obj_utils
14 | 
15 | current_dir = os.path.dirname(os.path.realpath(__file__))
16 | data_dir = os.path.join(current_dir, '.')
17 | 
18 | if __name__ == "__main__":
19 |     parser = argparse.ArgumentParser()
20 |     parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj'))
21 |     parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example1.png'))
22 |     args = parser.parse_args()
23 | 
24 |     # load obj file
25 |     vertices, triangles, normals = obj_utils.load_obj(args.filename_input)
26 |     vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3]
27 |     # TODO why are triangles not batched?
28 |     normals = normals[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3]
29 | 
30 |     # camera position:
31 |     eye = torch.tensor([[0.0, 0.0, 3.0]], dtype=torch.float32)
32 |     center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32)
33 |     world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32)
34 | 
35 |     # create a diffuse colors tensor coloring all vertices white
36 |     vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32)
37 | 
38 |     light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32)
39 |     light_intensities = torch.ones([1, 1, 3], dtype=torch.float32)
40 | 
41 |     image_width = 640
42 |     image_height = 480
43 | 
44 |     render = mr.render(
45 |         vertices, triangles, normals,
46 |         vertex_diffuse_colors, eye, center, world_up, light_positions,
47 |         light_intensities, image_width, image_height)
48 |     render = torch.reshape(render, [image_height, image_width, 4])
49 |     result_image = render.numpy()
50 |     result_image = np.clip(result_image, 0., 1.).copy(order="C")
51 | 
52 |     io.imsave(args.filename_output, (result_image * 255.0).astype(np.uint8))
53 | 


--------------------------------------------------------------------------------
/src/examples/example1b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example1b.png


--------------------------------------------------------------------------------
/src/examples/example1b.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Example 1b: Rendering a teapot from arbitrary angle with the soft rasterizer.
 3 | """
 4 | 
 5 | import os
 6 | import argparse
 7 | 
 8 | import torch
 9 | import numpy as np
10 | from skimage import io
11 | 
12 | from .. import soft_mesh_renderer as smr
13 | from ..common import obj_utils
14 | 
15 | current_dir = os.path.dirname(os.path.realpath(__file__))
16 | data_dir = os.path.join(current_dir, '.')
17 | 
18 | if __name__ == "__main__":
19 |     parser = argparse.ArgumentParser()
20 |     parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj'))
21 |     parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example1b.png'))
22 |     args = parser.parse_args()
23 | 
24 |     # load obj file
25 |     vertices, triangles, _ = obj_utils.load_obj(args.filename_input)
26 |     vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3]
27 |     # TODO why are triangles not batched?
28 | 
29 |     # camera position:
30 |     eye = torch.tensor([[0.0, 0.0, 3.0]], dtype=torch.float32)
31 |     center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32)
32 |     world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32)
33 | 
34 |     # create a diffuse colors tensor coloring all vertices white
35 |     vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32)
36 | 
37 |     light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32)
38 |     light_intensities = torch.ones([1, 1], dtype=torch.float32)
39 | 
40 |     image_width = 100
41 |     image_height = 100
42 | 
43 |     render = smr.render(
44 |         vertices,
45 |         triangles,
46 |         vertex_diffuse_colors,
47 |         eye,
48 |         center,
49 |         world_up,
50 |         light_positions,
51 |         light_intensities,
52 |         image_width,
53 |         image_height
54 |     )
55 |     render = torch.reshape(render, [image_height, image_width, 4])
56 |     # Binarize the alpha channel to 0 or 1. In the raw output of the soft renderer,
57 |     # it represents the probability that a triangle occupies the pixel. This will be
58 |     # less than 1.0 for any pixel which is not entirely covered by a triangle, even if
59 |     # the pixel is technically completely covered when considering all triangles. If we
60 |     # don't binarize the value, we will get seams in the output along triangle edges.
61 |     render[..., 3] = 1.0 * (render[..., 3] > 0.0)
62 |     result_image = render.numpy()
63 |     result_image = np.clip(result_image, 0., 1.).copy(order="C")
64 | 
65 |     io.imsave(args.filename_output, (result_image * 255.0).astype(np.uint8))
66 | 


--------------------------------------------------------------------------------
/src/examples/example4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example4.mp4


--------------------------------------------------------------------------------
/src/examples/example4.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example 4: Optimizing camera angles to reconstruct a teapot render.
  3 | 
  4 | This example doesn't converge with the barycentric-based differentiable renderer.
  5 | """
  6 | 
  7 | import os
  8 | import argparse
  9 | 
 10 | import torch
 11 | import numpy as np
 12 | from skimage import io
 13 | import imageio
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | from .. import mesh_renderer as mr
 17 | from ..common import obj_utils
 18 | from ..common import camera_utils
 19 | 
 20 | current_dir = os.path.dirname(os.path.realpath(__file__))
 21 | data_dir = os.path.join(current_dir, '.')
 22 | 
 23 | if __name__ == "__main__":
 24 |     parser = argparse.ArgumentParser()
 25 |     parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj'))
 26 |     parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, 'example4_target.png'))
 27 |     parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example4.mp4'))
 28 |     args = parser.parse_args()
 29 | 
 30 |     # load obj file
 31 |     vertices, triangles, normals = obj_utils.load_obj(args.filename_input)
 32 |     vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3]
 33 |     # TODO why are triangles not batched?
 34 |     normals = normals[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3]
 35 | 
 36 |     image_width = 640
 37 |     image_height = 480
 38 | 
 39 |     target_render = torch.tensor(
 40 |         io.imread(args.filename_target).astype(float) / 255.0
 41 |     )[None,:,:,:] # [image_width, image_height, 4] -> [batch_size=1, image_width, image_height, 4]
 42 | 
 43 |     # create a diffuse colors tensor coloring all vertices white
 44 |     vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32)
 45 | 
 46 |     light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32)
 47 |     light_intensities = torch.ones([1, 1, 3], dtype=torch.float32)
 48 | 
 49 |     # camera position:
 50 |     # initial_eye = torch.tensor([0.0, 2.0, 3.0], dtype=torch.float32)
 51 |     # initial_world_up = torch.tensor([0.0, 3.0, -2.0], dtype=torch.float32)
 52 |     initial_eye = torch.tensor([0.0, 3.0, 3.0], dtype=torch.float32)
 53 |     initial_world_up = torch.tensor([0.0, 3.0, -3.0], dtype=torch.float32)
 54 |     eye = torch.tensor(initial_eye[None,:], dtype=torch.float32, requires_grad=True)
 55 |     camera_euler_angles = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32, requires_grad=False)
 56 | 
 57 |     writer = imageio.get_writer(args.filename_output, fps=20)
 58 |     optimizer = torch.optim.SGD([eye, camera_euler_angles], 0.7, 0.1)
 59 |     def stepfn():
 60 |         optimizer.zero_grad()
 61 | 
 62 |         camera_euler_transforms = camera_utils.euler_matrices(camera_euler_angles)[0, :3, :3] # [3, 3]
 63 |         forward = torch.reshape(torch.matmul(-initial_eye, camera_euler_transforms.T), [1, 3])
 64 |         world_up = torch.reshape(torch.matmul(initial_world_up, camera_euler_transforms.T), [1, 3])
 65 |         center = eye + forward
 66 |         render = mr.render(
 67 |             vertices, triangles, normals,
 68 |             vertex_diffuse_colors, eye, center, world_up, light_positions,
 69 |             light_intensities, image_width, image_height)
 70 | 
 71 |         # write to GIF output
 72 |         frame = render[0].detach().numpy() # [image_height, image_width, 4]
 73 |         # black background
 74 |         frame = np.concatenate([
 75 |             frame[:,:,:3]*frame[:,:,3][:,:,None],
 76 |             np.ones([image_height, image_width, 1], dtype=np.float32)
 77 |         ], axis=-1)
 78 |         writer.append_data((255*frame).astype(np.uint8))
 79 | 
 80 |         loss = torch.mean(torch.abs(render - target_render))
 81 |         loss.backward()
 82 |         torch.nn.utils.clip_grad_norm_([eye, center, world_up], 1.0)
 83 |         return loss
 84 | 
 85 |     epochs = 50
 86 |     loss_points = []
 87 |     for e in range(epochs):
 88 |         print("step {} of {}".format(e, epochs))
 89 |         loss = optimizer.step(stepfn)
 90 |         loss_points.append(float(loss))
 91 | 
 92 |     writer.close()
 93 | 
 94 |     x = np.arange(0, epochs, 1)
 95 |     y = np.array(loss_points)
 96 |     plt.plot(x, y)
 97 |     plt.show()
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/src/examples/example4_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example4_target.png


--------------------------------------------------------------------------------
/src/examples/example5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example5.mp4


--------------------------------------------------------------------------------
/src/examples/example5.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example 5: Optimizing rotation of a cube.
  3 | """
  4 | 
  5 | import os
  6 | import argparse
  7 | 
  8 | import torch
  9 | import numpy as np
 10 | from skimage import io
 11 | import imageio
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | from .. import mesh_renderer as mr
 15 | from ..common import camera_utils, shapes
 16 | 
 17 | current_dir = os.path.dirname(os.path.realpath(__file__))
 18 | data_dir = os.path.join(current_dir, '.')
 19 | 
 20 | if __name__ == "__main__":
 21 |     parser = argparse.ArgumentParser()
 22 |     parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, '../mesh_renderer/test_data/Gray_Cube_0.png'))
 23 |     parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example5.mp4'))
 24 |     args = parser.parse_args()
 25 | 
 26 |     image_width = 640
 27 |     image_height = 480
 28 | 
 29 |     # Set up a basic cube centered at the origin, with vertex normals pointing
 30 |     # outwards along the line from the origin to the cube vertices:
 31 |     cube_vertices, cube_triangles, cube_normals = shapes.cube(2.)
 32 |     cube_triangles = torch.flip(cube_triangles, [1]) # CCW -> CW
 33 | 
 34 |     initial_euler_angles = [[0.0, 0.0, 0.0]]
 35 |     euler_angles = torch.tensor(initial_euler_angles, requires_grad=True)
 36 | 
 37 |     def render_cube_with_rotation(input_euler_angles):
 38 |         model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3]
 39 | 
 40 |         vertices_world_space = torch.reshape(
 41 |             torch.matmul(cube_vertices, model_rotation.T),
 42 |             [1, 8, 3])
 43 | 
 44 |         normals_world_space = torch.reshape(
 45 |             torch.matmul(cube_normals, model_rotation.T),
 46 |             [1, 8, 3])
 47 | 
 48 |         # camera position:
 49 |         eye = torch.tensor([[0.0, 0.0, 6.0]], dtype=torch.float32)
 50 |         center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32)
 51 |         world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32)
 52 | 
 53 |         vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32)
 54 |         light_positions = torch.reshape(eye, [1, 1, 3])
 55 |         light_intensities = torch.ones([1, 1, 3], dtype=torch.float32)
 56 | 
 57 |         render = mr.render(
 58 |             vertices_world_space, cube_triangles, normals_world_space,
 59 |             vertex_diffuse_colors, eye, center, world_up, light_positions,
 60 |             light_intensities, image_width, image_height)
 61 |         render = torch.reshape(render, [image_height, image_width, 4])
 62 |         return render
 63 | 
 64 |     target_render = torch.tensor(
 65 |         io.imread(args.filename_target).astype(float) / 255.0
 66 |     ) # [image_width, image_height, 4]
 67 | 
 68 |     writer = imageio.get_writer(args.filename_output, fps=20)
 69 |     optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1)
 70 |     def stepfn():
 71 |         optimizer.zero_grad()
 72 |         render = render_cube_with_rotation(euler_angles)
 73 | 
 74 |         # write to GIF output
 75 |         frame = render.detach().numpy() # [image_height, image_width, 4]
 76 |         # black background
 77 |         frame = np.concatenate([
 78 |             frame[:,:,:3]*frame[:,:,3][:,:,None],
 79 |             np.ones([image_height, image_width, 1], dtype=np.float32)
 80 |         ], axis=-1)
 81 |         writer.append_data((255*frame).astype(np.uint8))
 82 | 
 83 |         loss = torch.mean(torch.abs(render - target_render))
 84 |         loss.backward()
 85 |         torch.nn.utils.clip_grad_norm_([euler_angles], 1.0)
 86 |         return loss
 87 | 
 88 |     epochs = 50
 89 |     loss_points = []
 90 |     for e in range(epochs):
 91 |         print("step {} of {}".format(e, epochs))
 92 |         loss = optimizer.step(stepfn)
 93 |         loss_points.append(float(loss))
 94 | 
 95 |     writer.close()
 96 | 
 97 |     x = np.arange(0, epochs, 1)
 98 |     y = np.array(loss_points)
 99 |     plt.plot(x, y)
100 |     plt.show()
101 | 


--------------------------------------------------------------------------------
/src/examples/example5b.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example5b.mp4


--------------------------------------------------------------------------------
/src/examples/example5b.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example 5b: Optimizing rotation of a cube with the soft rasterizer.
  3 | """
  4 | 
  5 | import os
  6 | import argparse
  7 | 
  8 | import torch
  9 | import numpy as np
 10 | from skimage import io
 11 | import imageio
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | from .. import soft_mesh_renderer as smr
 15 | from ..common import camera_utils, shapes
 16 | 
 17 | current_dir = os.path.dirname(os.path.realpath(__file__))
 18 | data_dir = os.path.join(current_dir, '.')
 19 | 
 20 | if __name__ == "__main__":
 21 |     parser = argparse.ArgumentParser()
 22 | 
 23 |     parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, 'example5b_target.png'))
 24 |     parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example5b.mp4'))
 25 |     args = parser.parse_args()
 26 | 
 27 |     image_width = 100
 28 |     image_height = 100
 29 | 
 30 |     # Set up a basic cube centered at the origin, with vertex normals pointing
 31 |     # outwards along the line from the origin to the cube vertices:
 32 |     cube_vertices, cube_triangles, _ = shapes.cube(2.)
 33 | 
 34 |     initial_euler_angles = [[0.0, 0.0, 0.0]]
 35 |     euler_angles = torch.tensor(initial_euler_angles, requires_grad=True)
 36 | 
 37 |     def render_cube_with_rotation(input_euler_angles):
 38 |         model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3]
 39 | 
 40 |         vertices_world_space = torch.reshape(
 41 |             torch.matmul(cube_vertices, model_rotation.T),
 42 |             [1, 8, 3])
 43 | 
 44 |         # camera position:
 45 |         eye = torch.tensor([[0.0, 0.0, 6.0]], dtype=torch.float32)
 46 |         center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32)
 47 |         world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32)
 48 | 
 49 |         vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32)
 50 |         light_positions = torch.reshape(eye, [1, 1, 3])
 51 |         light_intensities = torch.ones([1, 1], dtype=torch.float32)
 52 | 
 53 |         render = smr.render(
 54 |             vertices_world_space,
 55 |             cube_triangles,
 56 |             vertex_diffuse_colors,
 57 |             eye,
 58 |             center,
 59 |             world_up,
 60 |             light_positions,
 61 |             light_intensities,
 62 |             image_width,
 63 |             image_height,
 64 |         )
 65 |         render = torch.reshape(render, [image_height, image_width, 4])
 66 |         return render
 67 | 
 68 |     """
 69 |     Target was generated with:
 70 |     ```
 71 |     target_euler_angles = torch.tensor([[-20.0, 0.0, 60.0]])
 72 |     ```
 73 |     """
 74 |     target_render = torch.tensor(
 75 |         io.imread(args.filename_target).astype(float) / 255.0
 76 |     ) # [image_width, image_height, 4]
 77 | 
 78 |     writer = imageio.get_writer(args.filename_output, fps=20)
 79 |     optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1)
 80 |     def stepfn():
 81 |         optimizer.zero_grad()
 82 |         render = render_cube_with_rotation(euler_angles)
 83 | 
 84 |         # write to GIF output
 85 |         frame = render.detach().numpy() # [image_height, image_width, 4]
 86 |         # black background
 87 |         frame = np.concatenate([
 88 |             frame[:,:,:3]*frame[:,:,3][:,:,None],
 89 |             np.ones([image_height, image_width, 1], dtype=np.float32)
 90 |         ], axis=-1)
 91 |         writer.append_data((255*frame).astype(np.uint8))
 92 | 
 93 |         loss = torch.mean(torch.abs(render - target_render))
 94 |         loss.backward()
 95 |         torch.nn.utils.clip_grad_norm_([euler_angles], 1.0)
 96 |         return loss
 97 | 
 98 |     epochs = 50
 99 |     loss_points = []
100 |     for e in range(epochs):
101 |         print("step {} of {}".format(e, epochs))
102 |         loss = optimizer.step(stepfn)
103 |         loss_points.append(float(loss))
104 | 
105 |     writer.close()
106 | 
107 |     x = np.arange(0, epochs, 1)
108 |     y = np.array(loss_points)
109 |     plt.plot(x, y)
110 |     plt.show()
111 | 


--------------------------------------------------------------------------------
/src/examples/example5b_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example5b_target.png


--------------------------------------------------------------------------------
/src/examples/example6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example6.mp4


--------------------------------------------------------------------------------
/src/examples/example6.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example 6: Optimizing rotation of a teapot.
  3 | 
  4 | This example converges for small perturbations in rotation but not larger perturbations
  5 | using the barycentric-differentiable renderer.
  6 | """
  7 | 
  8 | import os
  9 | import argparse
 10 | 
 11 | import torch
 12 | import numpy as np
 13 | from skimage import io
 14 | import imageio
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | from .. import mesh_renderer as mr
 18 | from ..common import obj_utils
 19 | from ..common import camera_utils
 20 | 
 21 | current_dir = os.path.dirname(os.path.realpath(__file__))
 22 | data_dir = os.path.join(current_dir, '.')
 23 | 
 24 | if __name__ == "__main__":
 25 |     parser = argparse.ArgumentParser()
 26 |     parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj'))
 27 |     parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, 'example6_target.png'))
 28 |     parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example6.mp4'))
 29 |     args = parser.parse_args()
 30 | 
 31 |     image_width = 640
 32 |     image_height = 480
 33 | 
 34 |     # load obj file
 35 |     vertices, triangles, normals = obj_utils.load_obj(args.filename_input)
 36 |     vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3]
 37 |     # TODO why are triangles not batched?
 38 |     normals = normals[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3]
 39 | 
 40 |     # camera position:
 41 |     eye = torch.tensor([[0.0, 3.0, 3.0]], dtype=torch.float32)
 42 |     center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32)
 43 |     world_up = torch.tensor([0.0, np.cos(-np.pi/4.), np.sin(-np.pi/4.)], dtype=torch.float32)
 44 | 
 45 |     vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32)
 46 |     light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32)
 47 |     light_intensities = torch.ones([1, 1, 3], dtype=torch.float32)
 48 | 
 49 |     initial_euler_angles = [[np.pi/4., 0., 0.]]
 50 |     euler_angles = torch.tensor(initial_euler_angles, requires_grad=True)
 51 | 
 52 |     def render_with_rotation(input_euler_angles):
 53 |         model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3]
 54 | 
 55 |         vertices_world_space = torch.matmul(vertices, model_rotation.T)
 56 |         # normals must be transformed using the inverse of the transpose of a matrix M
 57 |         normals_world_space = torch.matmul(normals, torch.inverse(model_rotation.T).T)
 58 | 
 59 |         render = mr.render(
 60 |             vertices_world_space, triangles, normals_world_space,
 61 |             vertex_diffuse_colors, eye, center, world_up, light_positions,
 62 |             light_intensities, image_width, image_height)
 63 |         render = torch.reshape(render, [image_height, image_width, 4])
 64 |         return render
 65 | 
 66 |     target_render = torch.tensor(
 67 |         io.imread(args.filename_target).astype(float) / 255.0
 68 |     ) # [image_width, image_height, 4]
 69 | 
 70 |     writer = imageio.get_writer(args.filename_output, fps=20)
 71 |     optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1)
 72 |     def stepfn():
 73 |         optimizer.zero_grad()
 74 |         render = render_with_rotation(euler_angles)
 75 | 
 76 |         # write to GIF output
 77 |         frame = render.detach().numpy() # [image_height, image_width, 4]
 78 |         # black background
 79 |         frame = np.concatenate([
 80 |             frame[:,:,:3]*frame[:,:,3][:,:,None],
 81 |             np.ones([image_height, image_width, 1], dtype=np.float32)
 82 |         ], axis=-1)
 83 |         writer.append_data((255*frame).astype(np.uint8))
 84 | 
 85 |         loss = torch.mean(torch.abs(render - target_render))
 86 |         loss.backward()
 87 |         torch.nn.utils.clip_grad_norm_([euler_angles], 1.0)
 88 |         return loss
 89 | 
 90 |     epochs = 50
 91 |     loss_points = []
 92 |     for e in range(epochs):
 93 |         print("step {} of {}".format(e, epochs))
 94 |         loss = optimizer.step(stepfn)
 95 |         loss_points.append(float(loss))
 96 | 
 97 |     writer.close()
 98 | 
 99 |     x = np.arange(0, epochs, 1)
100 |     y = np.array(loss_points)
101 |     plt.plot(x, y)
102 |     plt.show()
103 | 


--------------------------------------------------------------------------------
/src/examples/example6_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example6_target.png


--------------------------------------------------------------------------------
/src/examples/example6b.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example6b.mp4


--------------------------------------------------------------------------------
/src/examples/example6b.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example 6b: Optimizing rotation of a teapot with the soft mesh renderer.
  3 | 
  4 | This example converges for small perturbations in rotation but not larger perturbations
  5 | using the barycentric-differentiable renderer. The soft renderer is able to robustly
  6 | optimize larger perturbations than the barycentric renderer.
  7 | """
  8 | 
  9 | import os
 10 | import argparse
 11 | 
 12 | import torch
 13 | import numpy as np
 14 | from skimage import io
 15 | import imageio
 16 | import matplotlib.pyplot as plt
 17 | 
 18 | from .. import soft_mesh_renderer as smr
 19 | from ..common import obj_utils
 20 | from ..common import camera_utils
 21 | 
 22 | current_dir = os.path.dirname(os.path.realpath(__file__))
 23 | data_dir = os.path.join(current_dir, '.')
 24 | 
 25 | if __name__ == "__main__":
 26 |     parser = argparse.ArgumentParser()
 27 |     parser.add_argument('-i', '--filename_input', type=str, default=os.path.join(data_dir, 'teapot.obj'))
 28 |     parser.add_argument('-t', '--filename_target', type=str, default=os.path.join(data_dir, 'example6b_target.png'))
 29 |     parser.add_argument('-o', '--filename_output', type=str, default=os.path.join(data_dir, 'example6b.mp4'))
 30 |     args = parser.parse_args()
 31 | 
 32 |     image_width = 100
 33 |     image_height = 100
 34 | 
 35 |     # load obj file
 36 |     vertices, triangles, _ = obj_utils.load_obj(args.filename_input)
 37 |     vertices = vertices[None,:,:] # [num_vertices, 3] -> [batch_size=1, num_vertices, 3]
 38 |     # TODO why are triangles not batched?
 39 | 
 40 |     # camera position:
 41 |     eye = torch.tensor([[0.0, 3.0, 3.0]], dtype=torch.float32)
 42 |     center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32)
 43 |     world_up = torch.tensor([0.0, np.cos(-np.pi/4.), np.sin(-np.pi/4.)], dtype=torch.float32)
 44 | 
 45 |     vertex_diffuse_colors = torch.ones_like(vertices, dtype=torch.float32)
 46 |     light_positions = torch.tensor([[[0.0, 3.0, 0.0]]], dtype=torch.float32)
 47 |     light_intensities = torch.ones([1, 1], dtype=torch.float32)
 48 | 
 49 |     initial_euler_angles = [[np.pi/4., 0., 0.]]
 50 |     euler_angles = torch.tensor(initial_euler_angles, requires_grad=True)
 51 | 
 52 |     def render_with_rotation(input_euler_angles):
 53 |         model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3]
 54 |         vertices_world_space = torch.matmul(vertices, model_rotation.T)
 55 | 
 56 |         render = smr.render(
 57 |             vertices_world_space,
 58 |             triangles,
 59 |             vertex_diffuse_colors,
 60 |             eye,
 61 |             center,
 62 |             world_up,
 63 |             light_positions,
 64 |             light_intensities,
 65 |             image_width,
 66 |             image_height
 67 |         )
 68 |         render = torch.reshape(render, [image_height, image_width, 4])
 69 |         return render
 70 | 
 71 |     """
 72 |     Target was generated with:
 73 |     ```
 74 |     target_euler_angles = torch.tensor([[0.0, 0.0, 0.0]])
 75 |     ```
 76 |     """
 77 |     target_render = torch.tensor(
 78 |         io.imread(args.filename_target).astype(float) / 255.0
 79 |     ) # [image_width, image_height, 4]
 80 | 
 81 |     writer = imageio.get_writer(args.filename_output, fps=20)
 82 |     optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1)
 83 |     def stepfn():
 84 |         optimizer.zero_grad()
 85 |         render = render_with_rotation(euler_angles)
 86 | 
 87 |         # write to GIF output
 88 |         frame = render.detach().numpy() # [image_height, image_width, 4]
 89 |         # black background
 90 |         frame = np.concatenate([
 91 |             frame[:,:,:3]*frame[:,:,3][:,:,None],
 92 |             np.ones([image_height, image_width, 1], dtype=np.float32)
 93 |         ], axis=-1)
 94 |         writer.append_data((255*frame).astype(np.uint8))
 95 | 
 96 |         loss = torch.mean(torch.abs(render - target_render))
 97 |         loss.backward()
 98 |         torch.nn.utils.clip_grad_norm_([euler_angles], 1.0)
 99 |         return loss
100 | 
101 |     epochs = 50
102 |     loss_points = []
103 |     for e in range(epochs):
104 |         print("step {} of {}".format(e, epochs))
105 |         loss = optimizer.step(stepfn)
106 |         loss_points.append(float(loss))
107 | 
108 |     writer.close()
109 | 
110 |     x = np.arange(0, epochs, 1)
111 |     y = np.array(loss_points)
112 |     plt.plot(x, y)
113 |     plt.show()
114 | 


--------------------------------------------------------------------------------
/src/examples/example6b_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example6b_target.png


--------------------------------------------------------------------------------
/src/examples/example7b.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example 7: Fitting sphere vertices to a cow.
  3 | """
  4 | 
  5 | import os
  6 | import argparse
  7 | 
  8 | import torch
  9 | import numpy as np
 10 | from skimage import io
 11 | import imageio
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | from .. import soft_mesh_renderer as smr
 15 | from ..common import shapes, obj_utils
 16 | 
 17 | current_dir = os.path.dirname(os.path.realpath(__file__))
 18 | data_dir = os.path.join(current_dir, '.')
 19 | 
 20 | # From PyTorch3D:
 21 | # https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/ops/laplacian_matrices.py
 22 | #
 23 | # Note the laplacian depends only on the topology of a mesh and can be
 24 | # considered constant if the topology is fixed.
 25 | def compute_laplacian(verts: torch.Tensor, edges: torch.Tensor) -> torch.Tensor:
 26 |     """
 27 |     Computes the laplacian matrix.
 28 |     The definition of the laplacian is
 29 |     L[i, j] =    -1       , if i == j
 30 |     L[i, j] = 1 / deg(i)  , if (i, j) is an edge
 31 |     L[i, j] =    0        , otherwise
 32 |     where deg(i) is the degree of the i-th vertex in the graph.
 33 | 
 34 |     Args:
 35 |         verts: tensor of shape (V, 3) containing the vertices of the graph
 36 |         edges: tensor of shape (E, 2) containing the vertex indices of each edge
 37 |     Returns:
 38 |         L: Sparse FloatTensor of shape (V, V)
 39 |     """
 40 |     edges = edges.long()
 41 |     V = verts.shape[0]
 42 | 
 43 |     e0, e1 = edges.unbind(1)
 44 | 
 45 |     idx01 = torch.stack([e0, e1], dim=1)  # (E, 2)
 46 |     idx10 = torch.stack([e1, e0], dim=1)  # (E, 2)
 47 |     idx = torch.cat([idx01, idx10], dim=0).t()  # (2, 2*E)
 48 | 
 49 |     # First, we construct the adjacency matrix,
 50 |     # i.e. A[i, j] = 1 if (i,j) is an edge, or
 51 |     # A[e0, e1] = 1 &  A[e1, e0] = 1
 52 |     ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device)
 53 |     # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
 54 |     A = torch.sparse.FloatTensor(idx, ones, (V, V))
 55 | 
 56 |     # the sum of i-th row of A gives the degree of the i-th vertex
 57 |     deg = torch.sparse.sum(A, dim=1).to_dense()
 58 | 
 59 |     # We construct the Laplacian matrix by adding the non diagonal values
 60 |     # i.e. L[i, j] = 1 ./ deg(i) if (i, j) is an edge
 61 |     deg0 = deg[e0]
 62 |     # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
 63 |     deg0 = torch.where(deg0 > 0.0, 1.0 / deg0, deg0)
 64 |     deg1 = deg[e1]
 65 |     # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
 66 |     deg1 = torch.where(deg1 > 0.0, 1.0 / deg1, deg1)
 67 |     val = torch.cat([deg0, deg1])
 68 |     # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
 69 |     L = torch.sparse.FloatTensor(idx, val, (V, V))
 70 | 
 71 |     # Then we add the diagonal values L[i, i] = -1.
 72 |     idx = torch.arange(V, device=verts.device)
 73 |     idx = torch.stack([idx, idx], dim=0)
 74 |     ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device)
 75 |     # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
 76 |     L -= torch.sparse.FloatTensor(idx, ones, (V, V))
 77 | 
 78 |     return L
 79 | 
 80 | def compute_edges_list(vertices, faces):
 81 |     """
 82 |     Computes the edges of a mesh from its vertices and faces.
 83 |     Args:
 84 |         vertices: tensor of shape (V, 3) containing the vertices of the mesh
 85 |         faces: tensor of shape (F, 3) containing the vertex indices of each face
 86 |     Returns:
 87 |         edges: tensor of shape (E, 2) containing the vertex indices of each edge
 88 |     """
 89 |     faces = faces.to(vertices.device)
 90 |     # pyre-fixme[16]: Module `torch` has no attribute `cat`.
 91 |     edges = torch.cat(
 92 |         [
 93 |             faces[:, :2],
 94 |             faces[:, 1:],
 95 |             faces[:, ::2],
 96 |         ]
 97 |     )
 98 |     edges = edges.view(-1, 2)
 99 |     edges = torch.unique(edges, dim=0)
100 |     return edges
101 | 
102 | def mesh_laplacian_smoothing_loss(vertices, laplacian):
103 |     """
104 |     Computes the uniform weight laplacian smoothing objective for a single mesh (unbatched).
105 |     Args:
106 |         vertices: tensor of shape (V, 3) containing the vertices of the mesh
107 |         laplacian: tensor of shape (V, V) containing the laplacian matrix of the mesh
108 |     Returns:
109 |         loss: the laplacian smoothing loss
110 |     """
111 |     weight = 1.0 / (vertices.shape[0])
112 |     loss = laplacian.mm(vertices)
113 |     loss = loss.norm(dim=1)
114 |     loss = loss * weight
115 |     return loss.sum()
116 | 
117 | def mesh_edge_loss(vertices, edges):
118 |     """
119 |     Computes the edge length loss for a single mesh (unbatched).
120 |     Args:
121 |         vertices: tensor of shape (V, 3) containing the vertices of the mesh
122 |         edges: tensor of shape (E, 2) containing the vertex indices of each edge
123 |     Returns:
124 |         loss: the edge length loss
125 |     """
126 |     v0 = vertices[edges[:, 0]]
127 |     v1 = vertices[edges[:, 1]]
128 |     loss = (v0 - v1).norm(dim=1, p=2)
129 |     return loss.mean()
130 | 
131 | if __name__ == "__main__":
132 |     parser = argparse.ArgumentParser()
133 |     parser.add_argument('-t1', '--target_image1', type=str, default=os.path.join(data_dir, 'example7b_target1.png'))
134 |     parser.add_argument('-t2', '--target_image2', type=str, default=os.path.join(data_dir, 'example7b_target2.png'))
135 |     parser.add_argument('-t3', '--target_image3', type=str, default=os.path.join(data_dir, 'example7b_target3.png'))
136 |     parser.add_argument('-t4', '--target_image4', type=str, default=os.path.join(data_dir, 'example7b_target4.png'))
137 |     parser.add_argument('-o', '--output_model', type=str, default=os.path.join(data_dir, 'example7b.obj'))
138 |     parser.add_argument('-v', '--output_video', type=str, default=os.path.join(data_dir, 'example7b.mp4'))
139 |     parser.add_argument('-p', '--output_previews_dir', type=str, default=os.path.join(data_dir, 'example7b_previews'))
140 |     args = parser.parse_args()
141 | 
142 |     # load obj file
143 |     sphere_resolution = 20
144 |     vertices, triangles, _ = shapes.sphere(1., resolution=sphere_resolution)
145 |     edges = compute_edges_list(vertices, triangles)
146 |     laplacian = compute_laplacian(vertices, edges)
147 | 
148 |     vertices.requires_grad = True
149 | 
150 |     # camera positions:
151 |     eye = torch.tensor([
152 |         [0.0, 0.0, -3.0],
153 |         [3.0, 0.0, 0.0],
154 |         [-3.0, 0.0, 0.0],
155 |         [0.0, 0.0, 3.0],
156 |     ], dtype=torch.float32)
157 |     center = torch.zeros_like(eye)
158 |     world_up = torch.tensor([
159 |         [0.0, 1.0, 0.0],
160 |         [0.0, 1.0, 0.0],
161 |         [0.0, 1.0, 0.0],
162 |         [0.0, 1.0, 0.0],
163 |     ], dtype=torch.float32)
164 | 
165 |     light_positions = torch.tensor([
166 |         [
167 |             [0.0, 0.0, -3.0],
168 |             [0.0, 3.0, 0.0],
169 |             [0.0, 0.0, 3.0],
170 |         ],
171 |         [
172 |             [0.0, 0.0, -3.0],
173 |             [0.0, 3.0, 0.0],
174 |             [0.0, 0.0, 3.0],
175 |         ],
176 |         [
177 |             [0.0, 0.0, -3.0],
178 |             [0.0, 3.0, 0.0],
179 |             [0.0, 0.0, 3.0],
180 |         ],
181 |         [
182 |             [0.0, 0.0, -3.0],
183 |             [0.0, 3.0, 0.0],
184 |             [0.0, 0.0, 3.0],
185 |         ],
186 |     ], dtype=torch.float32)
187 |     light_intensities = torch.ones([4, 3], dtype=torch.float32)
188 | 
189 |     # Create a diffuse colors tensor coloring all vertices white
190 |     vertex_diffuse_colors = torch.ones([4, vertices.shape[0], 3], dtype=torch.float32)
191 | 
192 |     image_width = 96
193 |     image_height = 96
194 | 
195 |     target_render1 = torch.tensor(
196 |         io.imread(args.target_image1).astype(float) / 255.0
197 |     ) # [image_width, image_height, 4]
198 |     target_render2 = torch.tensor(
199 |         io.imread(args.target_image2).astype(float) / 255.0
200 |     ) # [image_width, image_height, 4]
201 |     target_render3 = torch.tensor(
202 |         io.imread(args.target_image3).astype(float) / 255.0
203 |     ) # [image_width, image_height, 4]
204 |     target_render4 = torch.tensor(
205 |         io.imread(args.target_image4).astype(float) / 255.0
206 |     ) # [image_width, image_height, 4]
207 |     target_renders = torch.stack([target_render1, target_render2, target_render3, target_render4], dim=0)
208 | 
209 |     epochs_between_frames = 10
210 |     epochs_between_previews = 100
211 | 
212 |     writer = imageio.get_writer(args.output_video, fps=20 / epochs_between_frames)
213 |     sigma_val = 1e-4
214 |     blur_radius = 0.1
215 |     edge_loss_weight = 0.1
216 |     laplacian_loss_weight = 0.1
217 |     lr = 4.0
218 |     momentum = 0.1
219 |     optimizer = torch.optim.SGD([vertices], lr, momentum)
220 |     def stepfn(e):
221 |         optimizer.zero_grad()
222 | 
223 |         # We need to re-create this tensor from `vertices` each run to
224 |         # ensure it gets changes from optimizer updates.
225 |         batched_vertices = torch.stack([vertices]*4, dim=0)
226 |         batched_renders = smr.render(
227 |             batched_vertices,
228 |             triangles,
229 |             vertex_diffuse_colors,
230 |             eye,
231 |             center,
232 |             world_up,
233 |             light_positions,
234 |             light_intensities,
235 |             image_width,
236 |             image_height,
237 |             sigma_val=1e-4,
238 |             fov_y=60.0,
239 |             blur_radius=0.1
240 |         )
241 | 
242 |         loss = torch.mean((batched_renders[..., 3] - target_renders[..., 3])**2)
243 |         loss += mesh_edge_loss(vertices, edges) * edge_loss_weight
244 |         loss += mesh_laplacian_smoothing_loss(vertices, laplacian) * laplacian_loss_weight
245 | 
246 |         loss.backward()
247 |         torch.nn.utils.clip_grad_norm_([vertices], 1.0)
248 | 
249 |         render = torch.reshape(batched_renders[0], [image_height, image_width, 4])
250 |         if e % epochs_between_frames == 0:
251 |             # write to video output
252 |             frame = render.detach().numpy() # [image_height, image_width, 4]
253 |             # black background
254 |             frame = np.concatenate([
255 |                 frame[:,:,:3]*frame[:,:,3][:,:,None],
256 |                 np.ones([image_height, image_width, 1], dtype=np.float32)
257 |             ], axis=-1)
258 |             writer.append_data((255*frame).astype(np.uint8))
259 | 
260 |             print("\nappended frame {} to video output\n".format(e // epochs_between_frames))
261 |         if e % epochs_between_previews == 0:
262 |             # write a preview image to the preview directory
263 |             preview_image_path = os.path.join(args.output_previews_dir, "preview_{:04d}.png".format(e))
264 |             preview_obj_path = os.path.join(args.output_previews_dir, "preview_{:04d}.obj".format(e))
265 |             result_image = render.detach().numpy()
266 |             # Binarize the alpha channel to 0 or 1. In the raw output of the soft renderer,
267 |             # it represents the probability that a triangle occupies the pixel. This will be
268 |             # less than 1.0 for any pixel which is not entirely covered by a triangle, even if
269 |             # the pixel is technically completely covered when considering all triangles. If we
270 |             # don't binarize the value, we will get seams in the output along triangle edges.
271 |             result_image[..., 3] = 1.0 * (result_image[..., 3] > 0.0)
272 |             result_image = np.clip(result_image, 0., 1.).copy(order="C")
273 |             io.imsave(preview_image_path, (result_image * 255.0).astype(np.uint8))
274 | 
275 |             obj_utils.save_obj(preview_obj_path, vertices, triangles)
276 | 
277 |             print("\nsaved previews to {} and {}\n".format(preview_image_path, preview_obj_path))
278 | 
279 |         return loss
280 | 
281 |     epochs = 1000
282 |     loss_points = []
283 |     for e in range(epochs):
284 |         print("\nstep {} of {}\n".format(e, epochs))
285 |         loss = optimizer.step(lambda: stepfn(e))
286 |         loss_points.append(float(loss))
287 | 
288 |     writer.close()
289 |     obj_utils.save_obj(args.output_model, vertices, triangles)
290 | 
291 |     x = np.arange(0, epochs, 1)
292 |     y = np.array(loss_points)
293 |     plt.plot(x, y)
294 |     plt.show()
295 | 
296 | 


--------------------------------------------------------------------------------
/src/examples/example7b_target1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example7b_target1.png


--------------------------------------------------------------------------------
/src/examples/example7b_target2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example7b_target2.png


--------------------------------------------------------------------------------
/src/examples/example7b_target3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example7b_target3.png


--------------------------------------------------------------------------------
/src/examples/example7b_target4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/examples/example7b_target4.png


--------------------------------------------------------------------------------
/src/mesh_renderer/README.md:
--------------------------------------------------------------------------------
  1 | # mesh_renderer
  2 | 
  3 | This package contains a differentiable, 3D mesh renderer using the barycentric formulation from Genova, Kyle, et al. "Unsupervised training for 3d morphable model regression." It is a port of Google's [tf_mesh_renderer](https://github.com/google/tf_mesh_renderer) to PyTorch.
  4 | 
  5 | There is an optimized C++ implementation of this renderer available for use. To enable it, first install the kernel via `cd src/mesh_renderer/kernels && python setup.py install`, then change the hardcoded config variable `USE_CPP_RASTERIZER` as described in the [mesh_renderer docs](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/mesh_renderer/README.md).
  6 | 
  7 | # Testing
  8 | 
  9 | To test the rasterizer module, run from the repository root:
 10 | ```
 11 | python -m src.mesh_renderer.rasterize_triangles_test
 12 | ```
 13 | 
 14 | To test the mesh renderer, run from the repository root:
 15 | 
 16 | ```
 17 | python -m src.mesh_renderer.mesh_renderer_test
 18 | ```
 19 | 
 20 | # Usage
 21 | 
 22 | The mesh renderer provides a high-level API for rendering triangle meshes with shading and a low-level API for rasterizing batches of triangles. The APIs are mostly the same as those in [tf_mesh_renderer](https://github.com/google/tf_mesh_renderer) but adjusted for PyTorch.
 23 | 
 24 | ## Rendering a shaded mesh
 25 | 
 26 | ### `mesh_renderer`
 27 | 
 28 | Rendering a shaded mesh can be done with the `mesh_renderer` function in `mesh_renderer/mesh_renderer.py`. This function renders an input scene (mesh, lights, and camera) using phong shading, and returns an output image.
 29 | 
 30 | #### Args:
 31 | 
 32 | - `vertices`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. Each triplet is an xyz position in world space.
 33 | - `triangles`: 2D int32 tensor with shape `[triangle_count, 3]`. Each triplet should contain vertex indices describing a triangle such that the triangle's normal points toward the viewer if the forward order of the triplet defines a clockwise winding of the vertices. Gradients with respect to this tensor are not available.
 34 | - `normals`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. Each triplet is the xyz vertex normal for its corresponding vertex. Each vector is assumed to be already normalized.
 35 | - `diffuse_colors`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. The RGB diffuse reflection in the range `[0, 1]` for each vertex.
 36 | - `camera_position`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape ` [3]` specifying the XYZ world space camera position.
 37 | - `camera_lookat`: 2D tensor with shape [batch_size, 3] or 1D tensor with shape `[3]` containing an XYZ point along the center of the camera's gaze.
 38 | - `camera_up`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape `[3]` containing the up direction for the camera. The camera will have no tilt with respect to this direction.
 39 | - `light_positions`: a 3D tensor with shape `[batch_size, light_count, 3]`. The XYZ position of each light in the scene. In the same coordinate space as pixel_positions.
 40 | - `light_intensities`: a 3D tensor with shape `[batch_size, light_count, 3]`. The RGB intensity values for each light. Intensities may be above 1.
 41 | - `image_width`: int specifying desired output image width in pixels.
 42 | - `image_height`: int specifying desired output image height in pixels.
 43 | - `specular_colors`: (optional) 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. The RGB specular reflection in the range `[0, 1]` for each vertex. If supplied, specular reflections will be computed, and both specular colors and shininess_coefficients are expected.
 44 | - `shininess_coefficients`: (optional) a 0D-2D float32 tensor with maximum shape `[batch_size, vertex_count]`. The phong shininess coefficient of each vertex. A 0D tensor or float gives a constant shininess coefficient of all vertices across all batches and images. A 1D tensor must have shape `[batch_size]`, and a single shininess coefficient per image is used.
 45 | - `ambient_color`: (optional) a 2D tensor with shape `[batch_size, 3]`. The RGB ambient color, which is added to each pixel in the scene. If None, it is assumed to be black.
 46 | - `fov_y`: (optional) float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying desired output image y field of view in degrees.
 47 | - `near_clip`: (optional) float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying near clipping plane distance.
 48 | - `far_clip`: (optional) float, 0D tensor, or 1D tensor with shape [batch_size] specifying far clipping plane distance.
 49 | 
 50 | 
 51 | #### Returns:
 52 | 
 53 | A 4D float32 tensor of shape `[batch_size, image_height, image_width, 4]` containing the lit RGBA color values for each image at each pixel. RGB colors are the intensity values before tonemapping and can be in the range `[0, infinity]`. Clipping to the range `[0, 1]` with `np.clip` is likely reasonable for both viewing and training most scenes. More complex scenes with multiple lights should tone map color values for display only. One simple tonemapping approach is to rescale color values as x/(1+x); gamma compression is another common technique. Alpha values are zero for background pixels and near one for mesh pixels.
 54 | 
 55 | ### Example
 56 | 
 57 | An example usage of the differentiable mesh renderer to render a cube, then optimize its rotation to match a target image can be seen in the `testThatCubeRotates` test case in `mesh_renderer_test.py`.
 58 | 
 59 | ## Rasterizing triangles with arbitrary attributes
 60 | 
 61 | ### `rasterize`
 62 | 
 63 | This is a lower-level function which can be used to rasterize a batch of triangles into a tensor providing interpolated vertex attributes in each pixel. This could be useful if you want to build your own shading on top of the core rasterization module, for example.
 64 | 
 65 | #### Args:
 66 | 
 67 | - `world_space_vertices`: 3D float32 tensor of xyz positions with shape `[batch_size, vertex_count, 3]`.
 68 | - `attributes`: 3D float32 tensor with shape `[batch_size, vertex_count, attribute_count]`. Each vertex attribute is interpolated across the triangle using barycentric interpolation.
 69 | - `triangles`: 2D int32 tensor with shape `[triangle_count, 3]`. Each triplet should contain vertex indices describing a triangle such that the triangle's normal points toward the viewer if the forward order of the triplet defines a clockwise winding of the vertices. Gradients with respect to this tensor are not available.
 70 | - `camera_matrices`: 3D float tensor with shape `[batch_size, 4, 4]` containing model-view-perspective projection matrices.
 71 | - `image_width`: int specifying desired output image width in pixels.
 72 | - `image_height`: int specifying desired output image height in pixels.
 73 | - `background_value`: a 1D float32 tensor with shape `[attribute_count]`. Pixels that lie outside all triangles take this value.
 74 | 
 75 | #### Returns:
 76 | 
 77 | - A 4D float32 tensor with shape `[batch_size, image_height, image_width, attribute_count]`, containing the interpolated vertex attributes at each pixel.
 78 | 
 79 | ### Example
 80 | 
 81 | An example usage of the `rasterize` API to rasterize a cube can be found in the `testRendersTwoCubesInBatch` test case in `rasterize_triangles_test.py`.
 82 | 
 83 | ## `camera_utils`
 84 | 
 85 | This file contains some utilities that may be useful for transforming the input scene before rendering. The `mesh_renderer` function uses some of these functions internally to project the world-space vertices into camera-space. Model-view-perspective projection matrices are also required as input to the lower-level rasterization APIs.
 86 | 
 87 | ### `euler_matrices`.
 88 | 
 89 | You can use this to create a Model matrix with rotation to transform a set of object-space vertices into world space before rendering it.
 90 | 
 91 | #### Args:
 92 | 
 93 | - `angles`: a `[batch_size, 3]` tensor containing X, Y, and Z angles in radians.
 94 | 
 95 | #### Returns:
 96 | 
 97 | - A `[batch_size, 4, 4]` tensor of matrices.
 98 | 
 99 | ### `look_at`
100 | 
101 | You can use this to compute a View matrix to transform a set of world-space vertices into eye space; this is primarily useful for the lower-level rasterization APIs which require an input View matrix.
102 | 
103 | #### Args:
104 | 
105 | - `eye`: 2D float32 tensor with shape `[batch_size, 3]` containing the XYZ world space position of the camera.
106 | - `center`: 2D float32 tensor with shape `[batch_size, 3]` containing a position along the center of the camera's gaze line.
107 | - `world_up`: 2D float32 tensor with shape `[batch_size, 3]` specifying the world's up direction; the output camera will have no tilt with respect to this direction.
108 | 
109 | #### Returns:
110 | 
111 | - A `[batch_size, 4, 4]` float tensor containing a right-handed camera extrinsics matrix that maps points from world space to points in eye space.
112 | 
113 | # Implementation notes
114 | 
115 | 
116 | ## Rasterizer
117 | 
118 | There are two implementations of the low-level `rasterize` API.
119 | 
120 | ### C++ kernel
121 | 
122 | This implementation is written in C++ for performance. Since it doesn't use PyTorch built-in functions under-the-hood and instead [extends `torch.autograd.Function`](https://pytorch.org/docs/stable/notes/extending.html#extending-autograd), the backward pass is explicitly written rather than just being implicit in the forward pass. Both are written in the [C++ extension](https://pytorch.org/tutorials/advanced/cpp_extension.html) in `src/mesh_renderer/kernels/rasterize_triangles.cpp`, with the wrapper code in `src/mesh_renderer/rasterize_triangles_ext.py`.
123 | 
124 | This implementation is enabled by setting the hard-coded global variable `USE_CPP_RASTERIZER = True` in `src/mesh_renderer/rasterize_triangles.py`.
125 | 
126 | ### Python-only kernel
127 | 
128 | This implementation is written in Python only in `src/mesh_renderer/rasterize_triangles_python.py` and leverages PyTorch built-in functions for autograd. It's much shorter than the C++ kernel and is intended to be simpler to understand. However, performance is much worse.
129 | 
130 | This implementation is enabled by setting the hard-coded global variable `USE_CPP_RASTERIZER = False` in `src/mesh_renderer/rasterize_triangles.py`. This is the default.


--------------------------------------------------------------------------------
/src/mesh_renderer/__init__.py:
--------------------------------------------------------------------------------
1 | from .render import render, tone_mapper
2 | from .rasterize import rasterize
3 | 
4 | __version__ = '0.0.1'
5 | name = 'mesh_renderer'
6 | 


--------------------------------------------------------------------------------
/src/mesh_renderer/kernels/rasterize_triangles.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <cmath>
  3 | #include <vector>
  4 | 
  5 | #include <torch/extension.h>
  6 | 
  7 | namespace {
  8 | 
  9 |   // Threshold for a barycentric coordinate triplet's sum, below which the
 10 |   // coordinates at a pixel are deemed degenerate. Most such degenerate
 11 |   // triplets in an image will be exactly zero, as this is how pixels outside
 12 |   // the mesh are rendered.
 13 |   constexpr float kDegenerateBarycentricCoordinatesCutoff = 0.9f;
 14 | 
 15 | }
 16 | 
 17 | // Takes the maximum of a, b, and c, rounds up, and converts to an integer
 18 | // in the range [low, high].
 19 | inline int clamped_integer_max(float a, float b, float c, int low, int high) {
 20 |   return std::min(
 21 |       std::max(static_cast<int>(std::ceil(std::max(std::max(a, b), c))), low),
 22 |       high);
 23 | }
 24 | 
 25 | // Takes the minimum of a, b, and c, rounds down, and converts to an integer
 26 | // in the range [low, high].
 27 | inline int clamped_integer_min(float a, float b, float c, int low, int high) {
 28 |   return std::min(
 29 |       std::max(static_cast<int>(std::floor(std::min(std::min(a, b), c))), low),
 30 |       high);
 31 | }
 32 | 
 33 | // Compute the edge functions from M^-1 as described by Olano and Greer,
 34 | // "Triangle Scan Conversion using 2D Homogeneous Coordinates."
 35 | //
 36 | // This function combines equations (3) and (4). It first computes
 37 | // [a b c] = u_i * M^-1, where u_0 = [1 0 0], u_1 = [0 1 0], etc.,
 38 | // then computes edge_i = aX + bY + c.
 39 | void compute_edge_functions(const float px, const float py,
 40 |                             const float m_inv[9], float values[3]) {
 41 |   for (int i = 0; i < 3; ++i) {
 42 |     const float a = m_inv[3 * i + 0];
 43 |     const float b = m_inv[3 * i + 1];
 44 |     const float c = m_inv[3 * i + 2];
 45 | 
 46 |     values[i] = a * px + b * py + c;
 47 |   }
 48 | }
 49 | 
 50 | // Compute a 3x3 matrix inverse without dividing by the determinant.
 51 | // Instead, makes an unnormalized matrix inverse with the corect sign
 52 | // by flipping a sign of the matric if the determinant is negative.
 53 | // By leaving out determinant division, the rows of M^-1 only depend on two out
 54 | // of three of the columns of M; i.e., the first row of M^-1 only depends on the
 55 | // second and third columns of M, the second only depends on the first and
 56 | // third, etc. This means we can compute edge functions for two neighboring
 57 | // triangles independently and produce exactly the same numerical result up
 58 | // to the sign. This in turn means we can avoid cracks in rasterization without
 59 | // using fixed-point arithmetic.
 60 | // See http://mathworld.wolfram.com/MatrixInverse.html
 61 | float compute_unnormalized_matrix_inverse(
 62 |   const float a11, const float a12, const float a13,
 63 |   const float a21, const float a22, const float a23,
 64 |   const float a31, const float a32, const float a33, float m_inv[9]) {
 65 |   m_inv[0] = a22 * a33 - a32 * a23;
 66 |   m_inv[1] = a13 * a32 - a33 * a12;
 67 |   m_inv[2] = a12 * a23 - a22 * a13;
 68 |   m_inv[3] = a23 * a31 - a33 * a21;
 69 |   m_inv[4] = a11 * a33 - a31 * a13;
 70 |   m_inv[5] = a13 * a21 - a23 * a11;
 71 |   m_inv[6] = a21 * a32 - a31 * a22;
 72 |   m_inv[7] = a12 * a31 - a32 * a11;
 73 |   m_inv[8] = a11 * a22 - a21 * a12;
 74 | 
 75 |   // The first column of the unnormalized M^-1 contains intermediate values for
 76 |   // det(M).
 77 |   const float det = a11 * m_inv[0] + a12 * m_inv[3] + a13 * m_inv[6];
 78 | 
 79 |   // Transfer the sign of the determinant.
 80 |   if (det < 0.0f) {
 81 |     for (int i = 0; i < 9; ++i) {
 82 |       m_inv[i] = -m_inv[i];
 83 |     }
 84 |   }
 85 | 
 86 |   return det;
 87 | }
 88 | 
 89 | // Determine whether the point p lies inside a front-facing triangle.
 90 | // Count pixels exactly on an edge as inside the triangle, as long as the
 91 | // triangle is not degenerate. Degenerate (zero-area) triangles always fail
 92 | // the inside test.
 93 | bool pixel_is_inside_triangle(const float edge_values[3]) {
 94 |   // Check that the edge values are all non-negative and that at least one is
 95 |   // positive (triangle is non-degenerate).
 96 |   return (edge_values[0] >= 0 && edge_values[1] >= 0 && edge_values[2] >= 0) &&
 97 |          (edge_values[0] > 0 || edge_values[1] > 0 || edge_values[2] > 0);
 98 | }
 99 | 
100 | // Compute df_dvertices, the derivative of a scalar loss function with respect
101 | // to the vector of stacked vertex coordinates in XYZW clip space.
102 | //
103 | // Params:
104 | // df_dbarycentric_coords: A 3D float32 tensor with shape
105 | //  {image_height, image_width, 3}. The element at index [y, x, b] gives the
106 | //  partial derivative of the scalar loss function with respect to the bth
107 | //  barycentric coordinate of pixel coordinate (y, x).
108 | // vertices: A 2D float32 tensor with shape {vertex_count, 4}.
109 | //   Each quadtruplet is the XYZW location of the vertex with that
110 | //   triplet's id. The coordinates are assumed to be OpenGL-style clip-space
111 | //   (i.e., post-projection, pre-divide), where X points right, Y points up,
112 | //   Z points away. Note Z here is the clip-space (z-buffer) depth and W is the
113 | //   world space depth.
114 | // triangles: A 2D int32 tensor with shape {triangle_count, 3}.
115 | //   Each triplet is the three vertex ids indexing into vertices
116 | //   describing one triangle with clockwise winding.
117 | // px_triangle_ids: A 2D tensor with shape {image_height, image_width}.
118 | //   At return, each pixel contains a triangle id in the range
119 | //   [0, triangle_count). The id value is also 0 if there is no triangle
120 | //   at the pixel. The px_barycentric_coordinates must be checked to distinguish
121 | //   between the two cases.
122 | // px_barycentric_coordinates: A 3D tensor with
123 | //   shape {image_height, image_width, 3}. At return, contains the triplet of
124 | //   barycentric coordinates at each pixel in the same vertex ordering as
125 | //   triangles. If no triangle is present, all coordinates are 0.
126 | //
127 | // Returns:
128 | // df_dvertices: A 2D tensor with shape {vertex_count, 4} giving the derivative
129 | //  of the scalar loss function f with respect to the vector of stacked vertex
130 | //  coordinates in XYZW clip space.
131 | std::vector<torch::Tensor> rasterize_triangles_backward(
132 |   const torch::Tensor &df_dbarycentric_coords,
133 |   const torch::Tensor &vertices,
134 |   const torch::Tensor &triangles,
135 |   const torch::Tensor &px_triangle_ids,
136 |   const torch::Tensor &px_barycentric_coords
137 | ) {
138 |   const int triangle_count = (int) triangles.size(0);
139 |   const int vertex_count = (int) vertices.size(0);
140 |   const int image_height = px_triangle_ids.size(0);
141 |   const int image_width = px_triangle_ids.size(1);
142 |   float unnormalized_matrix_inverse[9];
143 | 
144 |   auto df_dvertices = torch::zeros(
145 |     {vertex_count, 4},
146 |     torch::dtype(torch::kFloat32));
147 | 
148 |   auto df_dbarycentric_coords_a = df_dbarycentric_coords.accessor<float, 3>();
149 |   auto vertices_a = vertices.accessor<float, 2>();
150 |   auto triangles_a = triangles.accessor<int, 2>();
151 |   auto px_triangle_ids_a = px_triangle_ids.accessor<int, 2>();
152 |   auto px_barycentric_coords_a =
153 |     px_barycentric_coords.accessor<float, 3>();
154 |   auto df_dvertices_a = df_dvertices.accessor<float, 2>();
155 | 
156 |   for (int iy = 0; iy < image_height; ++iy) {
157 |     for (int ix = 0; ix < image_width; ++ix) {
158 |       int triangle_id = px_triangle_ids_a[iy][ix];
159 |       const float b0 = px_barycentric_coords_a[iy][ix][0];
160 |       const float b1 = px_barycentric_coords_a[iy][ix][1];
161 |       const float b2 = px_barycentric_coords_a[iy][ix][2];
162 |       if (triangle_id == 0 && b0 + b1 + b2 < kDegenerateBarycentricCoordinatesCutoff) {
163 |         continue;
164 |       }
165 | 
166 |       const int v0_id = triangles_a[triangle_id][0];
167 |       const int v1_id = triangles_a[triangle_id][1];
168 |       const int v2_id = triangles_a[triangle_id][2];
169 | 
170 |       const float v0x = vertices_a[v0_id][0];
171 |       const float v0y = vertices_a[v0_id][1];
172 |       const float v0w = vertices_a[v0_id][3];
173 |       const float v1x = vertices_a[v1_id][0];
174 |       const float v1y = vertices_a[v1_id][1];
175 |       const float v1w = vertices_a[v1_id][3];
176 |       const float v2x = vertices_a[v2_id][0];
177 |       const float v2y = vertices_a[v2_id][1];
178 |       const float v2w = vertices_a[v2_id][3];
179 | 
180 |       const float abs_det = std::abs(
181 |         compute_unnormalized_matrix_inverse(
182 |           v0x, v1x, v2x,
183 |           v0y, v1y, v2y,
184 |           v0w, v1w, v2w,
185 |           unnormalized_matrix_inverse));
186 | 
187 |       const float m_inv_d_dx = (
188 |         unnormalized_matrix_inverse[0] +
189 |         unnormalized_matrix_inverse[3] +
190 |         unnormalized_matrix_inverse[6]);
191 |       const float m_inv_d_dy = (
192 |         unnormalized_matrix_inverse[1] +
193 |         unnormalized_matrix_inverse[4] +
194 |         unnormalized_matrix_inverse[7]);
195 |       const float m_inv_d_dw = (
196 |         unnormalized_matrix_inverse[2] +
197 |         unnormalized_matrix_inverse[5] +
198 |         unnormalized_matrix_inverse[8]);
199 | 
200 |       // All of the below derivatives need to be normalized by abs_det.
201 | 
202 |       const float db0_dx0 = (-unnormalized_matrix_inverse[0]) * b0 + m_inv_d_dx * b0 * b0;
203 |       const float db0_dx1 = (-unnormalized_matrix_inverse[0]) * b1 + m_inv_d_dx * b0 * b1;
204 |       const float db0_dx2 = (-unnormalized_matrix_inverse[0]) * b2 + m_inv_d_dx * b0 * b2;
205 |       const float db0_dy0 = (-unnormalized_matrix_inverse[1]) * b0 + m_inv_d_dy * b0 * b0;
206 |       const float db0_dy1 = (-unnormalized_matrix_inverse[1]) * b1 + m_inv_d_dy * b0 * b1;
207 |       const float db0_dy2 = (-unnormalized_matrix_inverse[1]) * b2 + m_inv_d_dy * b0 * b2;
208 |       const float db0_dw0 = (-unnormalized_matrix_inverse[2]) * b0 + m_inv_d_dw * b0 * b0;
209 |       const float db0_dw1 = (-unnormalized_matrix_inverse[2]) * b1 + m_inv_d_dw * b0 * b1;
210 |       const float db0_dw2 = (-unnormalized_matrix_inverse[2]) * b2 + m_inv_d_dw * b0 * b2;
211 | 
212 |       const float db1_dx0 = (-unnormalized_matrix_inverse[3]) * b0 + m_inv_d_dx * b1 * b0;
213 |       const float db1_dx1 = (-unnormalized_matrix_inverse[3]) * b1 + m_inv_d_dx * b1 * b1;
214 |       const float db1_dx2 = (-unnormalized_matrix_inverse[3]) * b2 + m_inv_d_dx * b1 * b2;
215 |       const float db1_dy0 = (-unnormalized_matrix_inverse[4]) * b0 + m_inv_d_dy * b1 * b0;
216 |       const float db1_dy1 = (-unnormalized_matrix_inverse[4]) * b1 + m_inv_d_dy * b1 * b1;
217 |       const float db1_dy2 = (-unnormalized_matrix_inverse[4]) * b2 + m_inv_d_dy * b1 * b2;
218 |       const float db1_dw0 = (-unnormalized_matrix_inverse[5]) * b0 + m_inv_d_dw * b1 * b0;
219 |       const float db1_dw1 = (-unnormalized_matrix_inverse[5]) * b1 + m_inv_d_dw * b1 * b1;
220 |       const float db1_dw2 = (-unnormalized_matrix_inverse[5]) * b2 + m_inv_d_dw * b1 * b2;
221 | 
222 |       const float db2_dx0 = (-unnormalized_matrix_inverse[6]) * b0 + m_inv_d_dx * b2 * b0;
223 |       const float db2_dx1 = (-unnormalized_matrix_inverse[6]) * b1 + m_inv_d_dx * b2 * b1;
224 |       const float db2_dx2 = (-unnormalized_matrix_inverse[6]) * b2 + m_inv_d_dx * b2 * b2;
225 |       const float db2_dy0 = (-unnormalized_matrix_inverse[7]) * b0 + m_inv_d_dy * b2 * b0;
226 |       const float db2_dy1 = (-unnormalized_matrix_inverse[7]) * b1 + m_inv_d_dy * b2 * b1;
227 |       const float db2_dy2 = (-unnormalized_matrix_inverse[7]) * b2 + m_inv_d_dy * b2 * b2;
228 |       const float db2_dw0 = (-unnormalized_matrix_inverse[8]) * b0 + m_inv_d_dw * b2 * b0;
229 |       const float db2_dw1 = (-unnormalized_matrix_inverse[8]) * b1 + m_inv_d_dw * b2 * b1;
230 |       const float db2_dw2 = (-unnormalized_matrix_inverse[8]) * b2 + m_inv_d_dw * b2 * b2;
231 | 
232 |       df_dvertices_a[v0_id][0] += (
233 |         df_dbarycentric_coords_a[iy][ix][0] * db0_dx0 +
234 |         df_dbarycentric_coords_a[iy][ix][1] * db1_dx0 +
235 |         df_dbarycentric_coords_a[iy][ix][2] * db2_dx0) / abs_det;
236 |       df_dvertices_a[v0_id][1] += (
237 |         df_dbarycentric_coords_a[iy][ix][0] * db0_dy0 +
238 |         df_dbarycentric_coords_a[iy][ix][1] * db1_dy0 +
239 |         df_dbarycentric_coords_a[iy][ix][2] * db2_dy0) / abs_det;
240 |       df_dvertices_a[v0_id][3] += (
241 |         df_dbarycentric_coords_a[iy][ix][0] * db0_dw0 +
242 |         df_dbarycentric_coords_a[iy][ix][1] * db1_dw0 +
243 |         df_dbarycentric_coords_a[iy][ix][2] * db2_dw0) / abs_det;
244 | 
245 |       df_dvertices_a[v1_id][0] += (
246 |         df_dbarycentric_coords_a[iy][ix][0] * db0_dx1 +
247 |         df_dbarycentric_coords_a[iy][ix][1] * db1_dx1 +
248 |         df_dbarycentric_coords_a[iy][ix][2] * db2_dx1) / abs_det;
249 |       df_dvertices_a[v1_id][1] += (
250 |         df_dbarycentric_coords_a[iy][ix][0] * db0_dy1 +
251 |         df_dbarycentric_coords_a[iy][ix][1] * db1_dy1 +
252 |         df_dbarycentric_coords_a[iy][ix][2] * db2_dy1) / abs_det;
253 |       df_dvertices_a[v1_id][3] += (
254 |         df_dbarycentric_coords_a[iy][ix][0] * db0_dw1 +
255 |         df_dbarycentric_coords_a[iy][ix][1] * db1_dw1 +
256 |         df_dbarycentric_coords_a[iy][ix][2] * db2_dw1) / abs_det;
257 | 
258 |       df_dvertices_a[v2_id][0] += (
259 |         df_dbarycentric_coords_a[iy][ix][0] * db0_dx2 +
260 |         df_dbarycentric_coords_a[iy][ix][1] * db1_dx2 +
261 |         df_dbarycentric_coords_a[iy][ix][2] * db2_dx2) / abs_det;
262 |       df_dvertices_a[v2_id][1] += (
263 |         df_dbarycentric_coords_a[iy][ix][0] * db0_dy2 +
264 |         df_dbarycentric_coords_a[iy][ix][1] * db1_dy2 +
265 |         df_dbarycentric_coords_a[iy][ix][2] * db2_dy2) / abs_det;
266 |       df_dvertices_a[v2_id][3] += (
267 |         df_dbarycentric_coords_a[iy][ix][0] * db0_dw2 +
268 |         df_dbarycentric_coords_a[iy][ix][1] * db1_dw2 +
269 |         df_dbarycentric_coords_a[iy][ix][2] * db2_dw2) / abs_det;
270 |     }
271 |   }
272 |   return { df_dvertices };
273 | }
274 | 
275 | // Compute the triangle id, barycentric coordinates, and z-buffer at each pixel
276 | // in the image.
277 | //
278 | // Params:
279 | // vertices: A 2D float32 tensor with shape {vertex_count, 4}.
280 | //   Each quadtruplet is the XYZW location of the vertex with that
281 | //   triplet's id. The coordinates are assumed to be OpenGL-style clip-space
282 | //   (i.e., post-projection, pre-divide), where X points right, Y points up,
283 | //   Z points away. Note Z here is the clip-space (z-buffer) depth and W is the
284 | //   world space depth.
285 | // triangles: A 2D int32 tensor with shape {triangle_count, 3}.
286 | //   Each triplet is the three vertex ids indexing into vertices
287 | //   describing one triangle with clockwise winding.
288 | //
289 | // Returns:
290 | // px_triangle_ids: A 2D tensor with shape {image_height, image_width}.
291 | //   At return, each pixel contains a triangle id in the range
292 | //   [0, triangle_count). The id value is also 0 if there is no triangle
293 | //   at the pixel. The px_barycentric_coordinates must be checked to distinguish
294 | //   between the two cases.
295 | // px_barycentric_coordinates: A 3D tensor with
296 | //   shape {image_height, image_width, 3}. At return, contains the triplet of
297 | //   barycentric coordinates at each pixel in the same vertex ordering as
298 | //   triangles. If no triangle is present, all coordinates are 0.
299 | // z_buffer: A 2D tensor with shape {image_height, image_width} elements. At
300 | //   return, contains the normalized device Z coordinates of the rendered
301 | //   triangles.
302 | std::vector<torch::Tensor> rasterize_triangles_forward(
303 |   const torch::Tensor &vertices,
304 |   const torch::Tensor &triangles,
305 |   int image_width,
306 |   int image_height
307 | ) {
308 |   const int triangle_count = (int) triangles.size(0);
309 |   const float half_image_width = 0.5 * image_width;
310 |   const float half_image_height = 0.5 * image_height;
311 |   float unnormalized_matrix_inverse[9];
312 |   float b_over_w[3];
313 |   auto px_triangle_ids = torch::zeros(
314 |     {image_height, image_width},
315 |     torch::dtype(torch::kInt32));
316 |   auto px_barycentric_coords = torch::zeros(
317 |     {image_height, image_width, 3},
318 |     torch::dtype(torch::kFloat32).requires_grad(true));
319 |   auto z_buffer = torch::ones(
320 |     {image_height, image_width},
321 |     torch::dtype(torch::kFloat32));
322 | 
323 |   auto vertices_a = vertices.accessor<float, 2>();
324 |   auto triangles_a = triangles.accessor<int, 2>();
325 |   auto z_buffer_a = z_buffer.accessor<float, 2>();
326 |   auto px_triangle_ids_a = px_triangle_ids.accessor<int, 2>();
327 |   auto px_barycentric_coords_a =
328 |     px_barycentric_coords.accessor<float, 3>();
329 | 
330 |   for (int triangle_id = 0; triangle_id < triangle_count; ++triangle_id) {
331 |     const int v0_id = triangles_a[triangle_id][0];
332 |     const int v1_id = triangles_a[triangle_id][1];
333 |     const int v2_id = triangles_a[triangle_id][2];
334 | 
335 |     const float v0w = vertices_a[v0_id][3];
336 |     const float v1w = vertices_a[v1_id][3];
337 |     const float v2w = vertices_a[v2_id][3];
338 |     // Early exit: if all w < 0, triangle is entirely behind the eye.
339 |     if (v0w < 0 && v1w < 0 && v2w < 0) {
340 |       continue;
341 |     }
342 | 
343 |     const float v0x = vertices_a[v0_id][0];
344 |     const float v0y = vertices_a[v0_id][1];
345 |     const float v1x = vertices_a[v1_id][0];
346 |     const float v1y = vertices_a[v1_id][1];
347 |     const float v2x = vertices_a[v2_id][0];
348 |     const float v2y = vertices_a[v2_id][1];
349 | 
350 |     compute_unnormalized_matrix_inverse(v0x, v1x, v2x,
351 |                                         v0y, v1y, v2y,
352 |                                         v0w, v1w, v2w,
353 |                                         unnormalized_matrix_inverse);
354 | 
355 |     // Initialize the bounding box to the entire screen.
356 |     int left = 0, right = image_width, bottom = 0, top = image_height;
357 |     // If the triangle is entirely inside the screen, project the vertices to
358 |     // pixel coordinates and find the triangle bounding box enlarged to the
359 |     // nearest integer and clamped to the image boundaries.
360 |     if (v0w > 0 && v1w > 0 && v2w > 0) {
361 |       const float p0x = (v0x / v0w + 1.0) * half_image_width;
362 |       const float p1x = (v1x / v1w + 1.0) * half_image_width;
363 |       const float p2x = (v2x / v2w + 1.0) * half_image_width;
364 |       const float p0y = (v0y / v0w + 1.0) * half_image_height;
365 |       const float p1y = (v1y / v1w + 1.0) * half_image_height;
366 |       const float p2y = (v2y / v2w + 1.0) * half_image_height;
367 |       left = clamped_integer_min(p0x, p1x, p2x, 0, image_width);
368 |       right = clamped_integer_max(p0x, p1x, p2x, 0, image_width);
369 |       bottom = clamped_integer_min(p0y, p1y, p2y, 0, image_height);
370 |       top = clamped_integer_max(p0y, p1y, p2y, 0, image_height);
371 |     }
372 | 
373 |     // Iterate over each pixel in the bounding box.
374 |     for (int iy = bottom; iy < top; ++iy) {
375 |       for (int ix = left; ix < right; ++ix) {
376 |         const float px = ((ix + 0.5) / half_image_width) - 1.0;
377 |         const float py = ((iy + 0.5) / half_image_height) - 1.0;
378 | 
379 |         compute_edge_functions(px, py, unnormalized_matrix_inverse, b_over_w);
380 |         if (!pixel_is_inside_triangle(b_over_w)) {
381 |           continue;
382 |         }
383 | 
384 |         const float one_over_w = b_over_w[0] + b_over_w[1] + b_over_w[2];
385 |         const float b0 = b_over_w[0] / one_over_w;
386 |         const float b1 = b_over_w[1] / one_over_w;
387 |         const float b2 = b_over_w[2] / one_over_w;
388 | 
389 |         const float v0z = vertices_a[v0_id][2];
390 |         const float v1z = vertices_a[v1_id][2];
391 |         const float v2z = vertices_a[v2_id][2];
392 |         // Since we computed an unnormalized w above, we need to recompute
393 |         // a properly scaled clip-space w value and then divide clip-space z
394 |         // by that.
395 |         const float clip_z = b0 * v0z + b1 * v1z + b2 * v2z;
396 |         const float clip_w = b0 * v0w + b1 * v1w + b2 * v2w;
397 |         const float z = clip_z / clip_w;
398 | 
399 |         // Skip the pixel if it is farther than the current z-buffer pixel or
400 |         // beyond the near or far clipping plane.
401 |         if (z < -1.0 || z > 1.0 || z > z_buffer_a[iy][ix]) {
402 |           continue;
403 |         }
404 | 
405 |         px_triangle_ids_a[iy][ix] = triangle_id;
406 |         z_buffer_a[iy][ix] = z;
407 |         px_barycentric_coords_a[iy][ix][0] = b0;
408 |         px_barycentric_coords_a[iy][ix][1] = b1;
409 |         px_barycentric_coords_a[iy][ix][2] = b2;
410 |       }
411 |     }
412 |   }
413 | 
414 |   return {
415 |     px_triangle_ids,
416 |     px_barycentric_coords,
417 |     z_buffer
418 |   };
419 | }
420 | 
421 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
422 |   m.def("forward", &rasterize_triangles_forward, "Rasterize forward");
423 |   m.def("backward", &rasterize_triangles_backward, "Rasterize backward");
424 | }
425 | 


--------------------------------------------------------------------------------
/src/mesh_renderer/kernels/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils import cpp_extension
 3 | 
 4 | setup(name="rasterize_triangles_cpp",
 5 |       ext_modules=[
 6 |          cpp_extension.CppExtension(
 7 |             "rasterize_triangles_cpp", ["rasterize_triangles.cpp"]),
 8 |       ],
 9 |       cmdclass={"build_ext": cpp_extension.BuildExtension})
10 | 


--------------------------------------------------------------------------------
/src/mesh_renderer/mesh_renderer_test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import unittest
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | from ..common import camera_utils
  8 | from .. import mesh_renderer
  9 | from . import test_utils
 10 | 
 11 | class RenderTest(unittest.TestCase):
 12 |     def setUp(self):
 13 |         self.test_data_directory = os.path.join(
 14 |             os.path.dirname(os.path.abspath(__file__)),
 15 |             'test_data'
 16 |         )
 17 | 
 18 |         # Set up a basic cube centered at the origin, with vertex normals pointing
 19 |         # outwards along the line from the origin to the cube vertices:
 20 |         self.cube_vertices = torch.tensor(
 21 |             [[-1, -1, 1], [-1, -1, -1], [-1, 1, -1], [-1, 1, 1], [1, -1, 1],
 22 |             [1, -1, -1], [1, 1, -1], [1, 1, 1]],
 23 |             dtype=torch.float32)
 24 |         self.cube_normals = torch.nn.functional.normalize(self.cube_vertices, dim=1, p=2)
 25 |         self.cube_triangles = torch.tensor(
 26 |             [[0, 1, 2], [2, 3, 0], [3, 2, 6], [6, 7, 3], [7, 6, 5], [5, 4, 7],
 27 |              [4, 5, 1], [1, 0, 4], [5, 6, 2], [2, 1, 5], [7, 4, 0], [0, 3, 7]],
 28 |             dtype=torch.int32)
 29 | 
 30 |     def testRendersSimpleCube(self):
 31 |         """Renders a simple cube to test the full forward pass.
 32 | 
 33 |         Verifies the functionality of both the custom kernel and the python wrapper.
 34 |         """
 35 | 
 36 |         model_transforms = camera_utils.euler_matrices(
 37 |             torch.tensor([[-20.0, 0.0, 60.0], [45.0, 60.0, 0.0]]))[:, :3, :3]
 38 | 
 39 |         vertices_world_space = torch.matmul(
 40 |             torch.stack([self.cube_vertices, self.cube_vertices]),
 41 |             model_transforms.transpose(1, 2)
 42 |         )
 43 | 
 44 |         normals_world_space = torch.matmul(
 45 |             torch.stack([self.cube_normals, self.cube_normals]),
 46 |             model_transforms.transpose(1, 2)
 47 |         )
 48 | 
 49 |         # camera position:
 50 |         eye = torch.tensor(2 * [[0.0, 0.0, 6.0]], dtype=torch.float32)
 51 |         center = torch.tensor(2 * [[0.0, 0.0, 0.0]], dtype=torch.float32)
 52 |         world_up = torch.tensor(2 * [[0.0, 1.0, 0.0]], dtype=torch.float32)
 53 |         image_width = 640
 54 |         image_height = 480
 55 |         light_positions = torch.tensor([[[0.0, 0.0, 6.0]], [[0.0, 0.0, 6.0]]])
 56 |         light_intensities = torch.ones([2, 1, 3], dtype=torch.float32)
 57 |         vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32)
 58 | 
 59 |         images = mesh_renderer.render(
 60 |             vertices_world_space, self.cube_triangles, normals_world_space,
 61 |             vertex_diffuse_colors, eye, center, world_up, light_positions,
 62 |             light_intensities, image_width, image_height
 63 |         )
 64 | 
 65 |         for image_id in range(images.shape[0]):
 66 |             target_image_name = 'Gray_Cube_%i.png' % image_id
 67 |             baseline_image_path = os.path.join(self.test_data_directory,
 68 |                                             target_image_name)
 69 |             test_utils.expect_image_file_and_render_are_near(
 70 |                 self, baseline_image_path, images[image_id, :, :, :])
 71 | 
 72 |     def testComplexShading(self):
 73 |         """Tests specular highlights, colors, and multiple lights per image."""
 74 |         # rotate the cube for the test:
 75 |         model_transforms = camera_utils.euler_matrices(
 76 |             torch.tensor([[-20.0, 0.0, 60.0], [45.0, 60.0, 0.0]]))[:, :3, :3]
 77 | 
 78 |         vertices_world_space = torch.matmul(
 79 |             torch.stack([self.cube_vertices, self.cube_vertices]),
 80 |             model_transforms.transpose(1, 2)
 81 |         )
 82 | 
 83 |         normals_world_space = torch.matmul(
 84 |             torch.stack([self.cube_normals, self.cube_normals]),
 85 |             model_transforms.transpose(1, 2)
 86 |         )
 87 | 
 88 |         # camera position:
 89 |         eye = torch.tensor([[0.0, 0.0, 6.0], [0., 0.2, 18.0]], dtype=torch.float32)
 90 |         center = torch.tensor([[0.0, 0.0, 0.0], [0.1, -0.1, 0.1]], dtype=torch.float32)
 91 |         world_up = torch.tensor(
 92 |             [[0.0, 1.0, 0.0], [0.1, 1.0, 0.15]], dtype=torch.float32)
 93 |         fov_y = torch.tensor([40., 13.3], dtype=torch.float32)
 94 |         near_clip = torch.tensor(0.1, dtype=torch.float32)
 95 |         far_clip = torch.tensor(25.0, dtype=torch.float32)
 96 |         image_width = 640
 97 |         image_height = 480
 98 |         light_positions = torch.tensor([[[0.0, 0.0, 6.0], [1.0, 2.0, 6.0]],
 99 |                                     [[0.0, -2.0, 4.0], [1.0, 3.0, 4.0]]])
100 |         light_intensities = torch.tensor(
101 |             [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]], [[2.0, 0.0, 1.0], [0.0, 2.0,
102 |                                                                     1.0]]],
103 |             dtype=torch.float32)
104 |         vertex_diffuse_colors = torch.tensor(2*[[[1.0, 0.0, 0.0],
105 |                                                 [0.0, 1.0, 0.0],
106 |                                                 [0.0, 0.0, 1.0],
107 |                                                 [1.0, 1.0, 1.0],
108 |                                                 [1.0, 1.0, 0.0],
109 |                                                 [1.0, 0.0, 1.0],
110 |                                                 [0.0, 1.0, 1.0],
111 |                                                 [0.5, 0.5, 0.5]]],
112 |                                             dtype=torch.float32)
113 |         vertex_specular_colors = torch.tensor(2*[[[0.0, 1.0, 0.0],
114 |                                                 [0.0, 0.0, 1.0],
115 |                                                 [1.0, 1.0, 1.0],
116 |                                                 [1.0, 1.0, 0.0],
117 |                                                 [1.0, 0.0, 1.0],
118 |                                                 [0.0, 1.0, 1.0],
119 |                                                 [0.5, 0.5, 0.5],
120 |                                                 [1.0, 0.0, 0.0]]],
121 |                                             dtype=torch.float32)
122 |         shininess_coefficients = 6.0 * torch.ones([2, 8], dtype=torch.float32)
123 |         ambient_color = torch.tensor(
124 |             [[0., 0., 0.], [0.1, 0.1, 0.2]], dtype=torch.float32)
125 |         renders = mesh_renderer.render(
126 |             vertices_world_space, self.cube_triangles, normals_world_space,
127 |             vertex_diffuse_colors, eye, center, world_up, light_positions,
128 |             light_intensities, image_width, image_height, vertex_specular_colors,
129 |             shininess_coefficients, ambient_color, fov_y, near_clip, far_clip)
130 |         tonemapped_renders = torch.cat(
131 |             [
132 |                 mesh_renderer.tone_mapper(renders[:, :, :, 0:3], 0.7),
133 |                 renders[:, :, :, 3:4]
134 |             ],
135 |             dim=3)
136 | 
137 |         # Check that shininess coefficient broadcasting works by also rendering
138 |         # with a scalar shininess coefficient, and ensuring the result is identical:
139 |         broadcasted_renders = mesh_renderer.render(
140 |             vertices_world_space, self.cube_triangles, normals_world_space,
141 |             vertex_diffuse_colors, eye, center, world_up, light_positions,
142 |             light_intensities, image_width, image_height, vertex_specular_colors,
143 |             6.0, ambient_color, fov_y, near_clip, far_clip)
144 |         tonemapped_broadcasted_renders = torch.cat(
145 |             [
146 |                 mesh_renderer.tone_mapper(broadcasted_renders[:, :, :, 0:3], 0.7),
147 |                 broadcasted_renders[:, :, :, 3:4]
148 |             ],
149 |             dim=3)
150 | 
151 |     def testFullRenderGradientComputation(self):
152 |         """Verifies the Jacobian matrix for the entire renderer.
153 | 
154 |         This ensures correct gradients are propagated backwards through the entire
155 |         process, not just through the rasterization kernel. Uses the simple cube
156 |         forward pass.
157 |         """
158 |         image_height = 21
159 |         image_width = 28
160 | 
161 |         def render_cube_vertices(cube_vertices):
162 |             # rotate the cube for the test:
163 |             model_transforms = camera_utils.euler_matrices(
164 |                 torch.tensor([[-20.0, 0.0, 60.0], [45.0, 60.0, 0.0]]))[:, :3, :3]
165 | 
166 |             vertices_world_space = torch.matmul(
167 |                 torch.stack([cube_vertices, cube_vertices]),
168 |                 model_transforms.transpose(1, 2))
169 | 
170 |             normals_world_space = torch.matmul(
171 |                 torch.stack([self.cube_normals, self.cube_normals]),
172 |                 model_transforms.transpose(1, 2))
173 | 
174 |             # camera position:
175 |             eye = torch.tensor([0.0, 0.0, 6.0], dtype=torch.float32)
176 |             center = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32)
177 |             world_up = torch.tensor([0.0, 1.0, 0.0], dtype=torch.float32)
178 | 
179 |             # Scene has a single light from the viewer's eye.
180 |             light_positions = torch.unsqueeze(torch.stack([eye, eye], dim=0), dim=1)
181 |             light_intensities = torch.ones([2, 1, 3], dtype=torch.float32)
182 | 
183 |             vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32)
184 | 
185 |             rendered = mesh_renderer.render(
186 |                 vertices_world_space, self.cube_triangles, normals_world_space,
187 |                 vertex_diffuse_colors, eye, center, world_up, light_positions,
188 |                 light_intensities, image_width, image_height)
189 |             return rendered
190 | 
191 |         test_cube_vertices = torch.tensor(self.cube_vertices, requires_grad=True)
192 |         analytical = test_utils.get_analytical_jacobian(
193 |             test_cube_vertices, render_cube_vertices(test_cube_vertices))
194 |         numerical = test_utils.get_numerical_jacobian(
195 |             render_cube_vertices, test_cube_vertices, eps=1e-3)
196 |         jacobians_match = (
197 |             test_utils.check_jacobians_are_nearly_equal(
198 |                 analytical, numerical, 0.01, 0.01))
199 |         self.assertTrue(
200 |             jacobians_match,
201 |             "Analytical and numerical jacobians have too many relative or "
202 |             "absolute outliers")
203 | 
204 |     def testThatCubeRotates(self):
205 |         """Optimize a simple cube's rotation using pixel loss.
206 | 
207 |         The rotation is represented as static-basis euler angles. This test checks
208 |         that the computed gradients are useful.
209 |         """
210 |         image_height = 480
211 |         image_width = 640
212 |         initial_euler_angles = [[0.0, 0.0, 0.0]]
213 |         euler_angles = torch.tensor(initial_euler_angles, requires_grad=True)
214 | 
215 |         def render_cube_with_rotation(input_euler_angles):
216 |             model_rotation = camera_utils.euler_matrices(input_euler_angles)[0, :3, :3] # [3, 3]
217 | 
218 |             vertices_world_space = torch.reshape(
219 |                 torch.matmul(self.cube_vertices, model_rotation.T),
220 |                 [1, 8, 3])
221 | 
222 |             normals_world_space = torch.reshape(
223 |                 torch.matmul(self.cube_normals, model_rotation.T),
224 |                 [1, 8, 3])
225 | 
226 |             # camera position:
227 |             eye = torch.tensor([[0.0, 0.0, 6.0]], dtype=torch.float32)
228 |             center = torch.tensor([[0.0, 0.0, 0.0]], dtype=torch.float32)
229 |             world_up = torch.tensor([[0.0, 1.0, 0.0]], dtype=torch.float32)
230 | 
231 |             vertex_diffuse_colors = torch.ones_like(vertices_world_space, dtype=torch.float32)
232 |             light_positions = torch.reshape(eye, [1, 1, 3])
233 |             light_intensities = torch.ones([1, 1, 3], dtype=torch.float32)
234 | 
235 |             render = mesh_renderer.render(
236 |                 vertices_world_space, self.cube_triangles, normals_world_space,
237 |                 vertex_diffuse_colors, eye, center, world_up, light_positions,
238 |                 light_intensities, image_width, image_height)
239 |             render = torch.reshape(render, [image_height, image_width, 4])
240 |             return render
241 | 
242 |         # Pick the desired cube rotation for the test:
243 |         target_euler_angles = torch.tensor([[-20.0, 0.0, 60.0]])
244 |         desired_render = render_cube_with_rotation(target_euler_angles)
245 | 
246 |         optimizer = torch.optim.SGD([euler_angles], 0.7, 0.1)
247 |         def stepfn():
248 |             optimizer.zero_grad()
249 |             render = render_cube_with_rotation(euler_angles)
250 |             loss = torch.mean(torch.abs(render - desired_render))
251 |             loss.backward()
252 |             torch.nn.utils.clip_grad_norm_([euler_angles], 1.0)
253 |             return loss
254 | 
255 |         for _ in range(35):
256 |             optimizer.step(stepfn)
257 | 
258 |         final_render = render_cube_with_rotation(euler_angles)
259 |         desired_render = render_cube_with_rotation(target_euler_angles) # sanity check re-rendering target angles is the same
260 | 
261 |         target_image_name = 'Gray_Cube_0.png'
262 |         baseline_image_path = os.path.join(self.test_data_directory,
263 |                                             target_image_name)
264 |         test_utils.expect_image_file_and_render_are_near(
265 |             self, baseline_image_path, desired_render)
266 |         test_utils.expect_image_file_and_render_are_near(
267 |             self,
268 |             baseline_image_path,
269 |             final_render.detach(),
270 |             max_outlier_fraction=0.01,
271 |             pixel_error_threshold=0.04)
272 | 
273 | 
274 | if __name__ == "__main__":
275 |     unittest.main()


--------------------------------------------------------------------------------
/src/mesh_renderer/rasterize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Differentiable triangle rasterizer using Genova 2018 un-clipped
  3 | barycentric formulation.
  4 | """
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import torch
 11 | 
 12 | from ..common import camera_utils
 13 | 
 14 | USE_CPP_RASTERIZER = False
 15 | def rasterize_barycentric(clip_space_vertices, triangles, image_width, image_height):
 16 |     if USE_CPP_RASTERIZER:
 17 |         from . import rasterize_triangles_ext
 18 |         return rasterize_triangles_ext.BarycentricRasterizer.apply(
 19 |             clip_space_vertices, triangles, image_width, image_height
 20 |         )
 21 |     else:
 22 |         from . import rasterize_triangles_python
 23 |         return rasterize_triangles_python.rasterize_barycentric(
 24 |             clip_space_vertices, triangles, image_width, image_height
 25 |         )
 26 | 
 27 | def rasterize(world_space_vertices, attributes, triangles,
 28 |             camera_matrices, image_width, image_height, background_value):
 29 |     """Rasterize a mesh and compute interpolated vertex attributes.
 30 | 
 31 |     Applies projection matrices and then calls rasterize_clip_space().
 32 | 
 33 |     Args:
 34 |         world_space_vertices: 3D float32 tensor of xyz positions with shape
 35 |             [batch_size, vertex_count, 3].
 36 |         attributes: 3D float32 tensor with shape [batch_size, vertex_count,
 37 |             attribute_count]. Each vertex attribute is interpolated across
 38 |             the triangle using barycentric interpolation.
 39 |         triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet
 40 |             should contain vertex indices describing a triangle such that the
 41 |             triangle's normal points toward the viewer if the forward order of
 42 |             the triplet defines a clockwise winding of the vertices. Gradients
 43 |             with respect to this tensor are not available.
 44 |             # TODO: make this use CCW ordering to be consistent with soft_mesh_renderer
 45 |         camera_matrices: 3D float tensor with shape [batch_size, 4, 4] containing
 46 |             model-view-perspective projection matrices.
 47 |         image_width: int specifying desired output image width in pixels.
 48 |         image_height: int specifying desired output image height in pixels.
 49 |         background_value: a 1D float32 tensor with shape [attribute_count].
 50 |             Pixels that lie outside all triangles take this value.
 51 | 
 52 |     Returns:
 53 |         A 4D float32 tensor with shape [batch_size, image_height, image_width,
 54 |         attribute_count], containing the interpolated vertex attributes at each
 55 |         pixel.
 56 | 
 57 |     Raises:
 58 |         ValueError: An invalid argument to the method is detected.
 59 |     """
 60 |     clip_space_vertices = camera_utils.transform_homogeneous(
 61 |         camera_matrices, world_space_vertices)
 62 |     return rasterize_clip_space(clip_space_vertices, attributes, triangles,
 63 |                                 image_width, image_height, background_value)
 64 | 
 65 | 
 66 | def rasterize_clip_space(clip_space_vertices, attributes, triangles,
 67 |                          image_width, image_height, background_value):
 68 |     """Rasterize the input mesh expressed in clip-space (xyzw) coordinates.
 69 | 
 70 |     Interpolates vertex attributes using perspective-correct interpolation
 71 |     and clips triangles that lie outside the viewing frustum.
 72 | 
 73 |     Args:
 74 |         clip_space_vertices: 3D float32 tensor of homogeneous vertices (xyzw)
 75 |             with shape [batch_size, vertex_count, 4].
 76 |         attributes: 3D float32 tensor with shape [batch_size, vertex_count,
 77 |             attribute_count]. Each vertex attribute is interpolated across the
 78 |             triangle using barycentric interpolation.
 79 |         triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet
 80 |             should contain vertex indices describing a triangle such that the
 81 |             triangle's normal points toward the viewer if the forward order of
 82 |             the triplet defines a clockwise winding of the vertices. Gradients
 83 |             with respect to this tensor are not available.
 84 |             # TODO: make this use CCW ordering to be consistent with soft_mesh_renderer
 85 |         image_width: int specifying desired output image width in pixels.
 86 |         image_height: int specifying desired output image height in pixels.
 87 |         background_value: a 1D float32 tensor with shape [attribute_count].
 88 |             Pixels that lie outside all triangles take this value.
 89 | 
 90 |     Returns:
 91 |         A 4D float32 tensor with shape [batch_size, image_height, image_width,
 92 |         attribute_count], containing the interpolated vertex attributes at each
 93 |         pixel.
 94 | 
 95 |     Raises:
 96 |         ValueError: An invalid argument to the method is detected.
 97 |     """
 98 |     if not image_width > 0:
 99 |         raise ValueError("Image width must be > 0.")
100 |     if not image_height > 0:
101 |         raise ValueError("Image height must be > 0.")
102 |     if len(clip_space_vertices.shape) != 3:
103 |         raise ValueError("The vertex buffer must be 3D.")
104 | 
105 |     vertex_count = clip_space_vertices.shape[1]
106 | 
107 |     batch_size = clip_space_vertices.shape[0]
108 | 
109 |     per_image_barycentric_coordinates = []
110 |     per_image_vertex_ids = []
111 | 
112 |     for b in range(batch_size):
113 |         px_triangle_ids, px_barycentric_coords, _ = rasterize_barycentric(
114 |             clip_space_vertices[b, :, :], triangles, image_width, image_height)
115 |         per_image_barycentric_coordinates.append(
116 |             torch.reshape(px_barycentric_coords, [-1, 3])) # [pixel_count, 3]
117 | 
118 |         vertex_ids = torch.index_select(
119 |             triangles, 0, torch.reshape(px_triangle_ids, [-1]).long()) # [pixel_count, 3]
120 |         reindexed_ids = vertex_ids + b * clip_space_vertices.shape[1]
121 |         per_image_vertex_ids.append(reindexed_ids)
122 | 
123 |     barycentric_coordinates = torch.reshape(
124 |         torch.stack(per_image_barycentric_coordinates, 0), [-1, 3])
125 |     vertex_ids = torch.reshape(
126 |         torch.stack(per_image_vertex_ids, 0), [-1, 3])
127 | 
128 |     # Indexes with each pixel's clip-space triangle's extrema (the pixel's
129 |     # 'corner points') ids to get the relevant properties for deferred shading.
130 |     flattened_vertex_attributes = torch.reshape(attributes,
131 |                                                 [batch_size * vertex_count, -1])
132 |     corner_attributes = flattened_vertex_attributes[vertex_ids.long()]
133 | 
134 |     # Computes the pixel attributes by interpolating the known attributes at
135 |     # the corner points of the triangle interpolated with the
136 |     # barycentric coordinates.
137 |     weighted_vertex_attributes = torch.mul(corner_attributes,
138 |         torch.unsqueeze(barycentric_coordinates, 2))
139 |     summed_attributes = torch.sum(weighted_vertex_attributes, dim=1)
140 |     attribute_images = torch.reshape(summed_attributes,
141 |         [batch_size, image_height, image_width, -1])
142 | 
143 |     # Barycentric coordinates should approximately sum to one where there is
144 |     # rendered geometry, but be exactly zero where there is not.
145 |     alphas = torch.clamp(
146 |         torch.sum(2.0 * barycentric_coordinates, dim=1), 0.0, 1.0)
147 |     alphas = torch.reshape(alphas, [batch_size, image_height, image_width, 1])
148 | 
149 |     attributes_with_background = (
150 |         alphas * attribute_images + (1.0 - alphas) * background_value)
151 | 
152 |     return attributes_with_background
153 | 


--------------------------------------------------------------------------------
/src/mesh_renderer/rasterize_triangles_ext.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import rasterize_triangles_cpp
 4 | 
 5 | 
 6 | class BarycentricRasterizer(torch.autograd.Function):
 7 |     @staticmethod
 8 |     def forward(ctx, clip_space_vertices, triangles, image_width, image_height):
 9 |         """Rasterize the input mesh expressed in clip-space (xyzw) coordinates.
10 | 
11 |         Interpolates barycentric coordinates using perspective-correct interpolation
12 |         and clips triangles that lie outside the viewing frustum.
13 | 
14 |         Args:
15 |             clip_space_vertices: 2D float32 tensor of homogeneous vertices (xyzw)
16 |                 with shape [vertex_count, 4].
17 |             triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet
18 |                 should contain vertex indices describing a triangle such that the
19 |                 triangle's normal points toward the viewer if the forward order of
20 |                 the triplet defines a clockwise winding of the vertices. Gradients
21 |                 with respect to this tensor are not available.
22 |             image_width: int specifying desired output image width in pixels.
23 |             image_height: int specifying desired output image height in pixels.
24 | 
25 |         Returns:
26 |             px_triangle_ids: A 2D tensor with shape [image_height, image_width].
27 |               At return, each pixel contains a triangle id in the range
28 |               [0, triangle_count). The id value is also 0 if there is no triangle
29 |               at the pixel. The px_barycentric_coordinates must be checked to distinguish
30 |               between the two cases.
31 |             px_barycentric_coordinates: A 3D tensor with
32 |               shape [image_height, image_width, 3]. At return, contains the triplet of
33 |               barycentric coordinates at each pixel in the same vertex ordering as
34 |               triangles. If no triangle is present, all coordinates are 0.
35 |             z_buffer: A 2D tensor with shape [image_height, image_width] elements. At
36 |               return, contains the normalized device Z coordinates of the rendered
37 |               triangles.
38 |         """
39 |         px_triangle_ids, px_barycentric_coords, z_buffer = rasterize_triangles_cpp.forward(
40 |             clip_space_vertices, triangles, image_width, image_height)
41 |         ctx.save_for_backward(clip_space_vertices, triangles,
42 |                               px_triangle_ids, px_barycentric_coords)
43 |         return px_triangle_ids, px_barycentric_coords, z_buffer
44 | 
45 |     @staticmethod
46 |     def backward(ctx, _, df_dbarycentric_coords, __):
47 |         """Get the gradient of a scalar loss function w.r.t. input vertices
48 |         expressed in clip-space (xyzw) coordinates.
49 |         In the backward pass we receive a Tensor containing the gradient of the
50 |         loss function w.r.t. our barycentric coordinate output and compute
51 |         the gradient of the loss w.r.t. each vertex.
52 | 
53 |         Gradients w.r.t. triangle_ids or image width or height are not available.
54 |         """
55 |         clip_space_vertices, triangles, px_triangle_ids, px_barycentric_coords = ctx.saved_tensors
56 |         output = rasterize_triangles_cpp.backward(
57 |             df_dbarycentric_coords,
58 |             clip_space_vertices,
59 |             triangles,
60 |             px_triangle_ids,
61 |             px_barycentric_coords)
62 |         df_dvertices, = output
63 |         return df_dvertices, torch.zeros_like(triangles), None, None


--------------------------------------------------------------------------------
/src/mesh_renderer/rasterize_triangles_python.py:
--------------------------------------------------------------------------------
  1 | from ..common import camera_utils
  2 | import torch
  3 | import math
  4 | 
  5 | """
  6 | rasterize_triangles_soft.py
  7 | 
  8 | Implements BarycentricRasterizer with PyTorch-only primitives (no C++ extensions).
  9 | """
 10 | 
 11 | # Returns a 4x4 viewport matrix which can be used to convert 3D homogeneous points in clip space to screen space,
 12 | # specified by args:
 13 | # - x: integer giving the screen space X offset
 14 | # - y: integer giving the screen space Y offset
 15 | # - w: integer giving the total screen space width
 16 | # - h: integer giving the total screen space height
 17 | # - z_buffer_res: number giving resolution of the z-buffer
 18 | #
 19 | # The bi-unit cube [-1, 1]*[-1, 1]*[-1, 1] should be mapped onto the screen cube [x, x+w]*[y, y+h]*[0, z_buffer_res].
 20 | def viewport(x, y, w, h, z_buffer_res):
 21 |     res = torch.eye(4)
 22 |     res[0][0] = w/2.0
 23 |     res[1][1] = h/2.0
 24 |     res[2][2] = z_buffer_res/2.0
 25 |     res[0:3, 3] = torch.tensor([x + w/2.0, y + h/2.0, z_buffer_res / 2.0])
 26 |     return res
 27 | 
 28 | # Returns barycentric coordinates of a 3D point P w.r.t. triangle v0, v1, v2.
 29 | # The input `M_inv` should be the inverse of a 3x3 matrix where the columns are the vertices.
 30 | def barycentric(M_inv, p):
 31 |     return M_inv @ p
 32 | 
 33 | def rasterize_barycentric(clip_space_vertices, triangles, image_width, image_height):
 34 |     """Rasterize the input mesh expressed in clip-space (xyzw) coordinates.
 35 | 
 36 |     Interpolates barycentric coordinates using perspective-correct interpolation
 37 |     and clips triangles that lie outside the viewing frustum.
 38 | 
 39 |     Args:
 40 |         clip_space_vertices: 2D float32 tensor of homogeneous vertices (xyzw)
 41 |             with shape [vertex_count, 4].
 42 |         triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet
 43 |             should contain vertex indices describing a triangle such that the
 44 |             triangle's normal points toward the viewer if the forward order of
 45 |             the triplet defines a clockwise winding of the vertices. Gradients
 46 |             with respect to this tensor are not available.
 47 |         image_width: int specifying desired output image width in pixels.
 48 |         image_height: int specifying desired output image height in pixels.
 49 | 
 50 |     Returns:
 51 |         px_triangle_ids: A 2D tensor with shape [image_height, image_width].
 52 |             At return, each pixel contains a triangle id in the range
 53 |             [0, triangle_count). The id value is also 0 if there is no triangle
 54 |             at the pixel. The px_barycentric_coordinates must be checked to distinguish
 55 |             between the two cases.
 56 |         px_barycentric_coordinates: A 3D tensor with
 57 |             shape [image_height, image_width, 3]. At return, contains the triplet of
 58 |             barycentric coordinates at each pixel in the same vertex ordering as
 59 |             triangles. If no triangle is present, all coordinates are 0.
 60 |         z_buffer: A 2D tensor with shape [image_height, image_width] elements. At
 61 |             return, contains the normalized device Z coordinates of the rendered
 62 |             triangles.
 63 |     """
 64 |     z_buffer = torch.ones([image_height, image_width], dtype=torch.float32)
 65 |     px_triangle_ids = torch.zeros([image_height, image_width], dtype=torch.int32)
 66 |     px_barycentric_coordinates = torch.zeros([image_height, image_width, 3], dtype=torch.float32)
 67 | 
 68 |     # z-buffer ranges from 0.0 to 1.0, anything outside gets clipped
 69 |     z_buffer_res = 1.0
 70 |     viewport_mat = viewport(0., 0., image_width, image_height, z_buffer_res)
 71 |     px_M = torch.zeros(3, 3)
 72 | 
 73 |     for triangle_id in range(len(triangles)):
 74 |         triangle = triangles[triangle_id]
 75 |         proj_v012 = clip_space_vertices[triangle] # shape: [3, 4]
 76 |         proj_v012_w = proj_v012[:,[3]] # shape: [3, 1]
 77 | 
 78 |         # clip space to screen space
 79 |         px_v012 = (viewport_mat @ (proj_v012 / (proj_v012_w)).T).T[:,:3]
 80 | 
 81 |         # get bbox in screen-space
 82 |         minx = math.floor(
 83 |             max(0, min(px_v012[0][0], px_v012[1][0], px_v012[2][0], image_width))
 84 |         )
 85 |         miny = math.floor(
 86 |             max(0, min(px_v012[0][1], px_v012[1][1], px_v012[2][1], image_height))
 87 |         )
 88 |         maxx = math.ceil(
 89 |             min(image_width, max(px_v012[0][0], px_v012[1][0], px_v012[2][0], 0))
 90 |         )
 91 |         maxy = math.ceil(
 92 |             min(image_height, max(px_v012[0][1], px_v012[1][1], px_v012[2][1], 0))
 93 |         )
 94 | 
 95 |         px_M[:] = px_v012.T
 96 |         px_M[2,:] = torch.tensor([1., 1., 1.])
 97 |         try:
 98 |             px_M_inv = px_M.inverse()
 99 |         except Exception:
100 |             # Screen-space vertex basis is not invertible, meaning triangle is
101 |             # degenerate when projected (zero area). Skip rendering
102 |             continue
103 | 
104 |         did_draw = 0
105 |         # Depths of the screen-space vertices as suitable for z-test.
106 |         # Note that depth is inversely proportional to the vertex eye-space z-coordinate.
107 |         vertex_depths = px_v012[:,2]
108 | 
109 |         for y in range(miny, maxy):
110 |             if y<0 or y>=image_height:
111 |                 continue
112 |             for x in range(minx, maxx):
113 |                 if x<0 or x>=image_width:
114 |                     continue
115 |                 p = torch.tensor([x + 0.5, y + 0.5, 1.])
116 |                 bc_screen = barycentric(px_M_inv, p)
117 |                 if bc_screen[0] < 0 or bc_screen[1] < 0 or bc_screen[2] < 0:
118 |                     # pixel is not inside triangle
119 |                     continue
120 |                 else:
121 |                     # get perspective-correct barycentric coordinates
122 |                     bc = torch.nn.functional.normalize(bc_screen / proj_v012_w.T[0], dim=0, p=1)
123 |                     # TODO: shouldn't this be perspective-corrected z? bug?
124 |                     z = vertex_depths @ bc_screen
125 |                     if z < 0.0 or z > 1.0 or z > z_buffer[y][x]:
126 |                         continue
127 |                     did_draw += 1
128 |                     z_buffer[y][x] = z
129 |                     px_triangle_ids[y][x] = triangle_id
130 |                     px_barycentric_coordinates[y][x] = bc
131 |         print("drew {} pixels for triangle {}".format(did_draw, triangle_id))
132 | 
133 |     return px_triangle_ids, px_barycentric_coordinates, z_buffer
134 | 


--------------------------------------------------------------------------------
/src/mesh_renderer/rasterize_triangles_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import unittest
  7 | 
  8 | import numpy as np
  9 | import torch
 10 | 
 11 | from ..common import camera_utils
 12 | from .rasterize import rasterize, rasterize_barycentric
 13 | from . import test_utils
 14 | 
 15 | 
 16 | class RenderTest(unittest.TestCase):
 17 |     def setUp(self):
 18 |         self.test_data_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "./test_data/")
 19 | 
 20 |         self.cube_vertex_positions = torch.tensor(
 21 |             [[-1, -1, 1], [-1, -1, -1], [-1, 1, -1], [-1, 1, 1], [1, -1, 1],
 22 |              [1, -1, -1], [1, 1, -1], [1, 1, 1]],
 23 |             dtype=torch.float32)
 24 |         self.cube_triangles = torch.tensor(
 25 |             [[0, 1, 2], [2, 3, 0], [3, 2, 6], [6, 7, 3], [7, 6, 5], [5, 4, 7],
 26 |              [4, 5, 1], [1, 0, 4], [5, 6, 2], [2, 1, 5], [7, 4, 0], [0, 3, 7]],
 27 |             dtype=torch.int32)
 28 | 
 29 |         self.image_width = 640
 30 |         self.image_height = 480
 31 | 
 32 |         self.perspective = camera_utils.perspective(
 33 |             self.image_width / self.image_height,
 34 |             torch.tensor([40.0]), torch.tensor([0.01]),
 35 |             torch.tensor([10.0]))
 36 | 
 37 |     def runTriangleTest(self, w_vector, target_image_name):
 38 |         """Directly renders a rasterized triangle's barycentric coordinates.
 39 | 
 40 |         Tests only the kernel (rasterize_triangles_module).
 41 | 
 42 |         Args:
 43 |             w_vector: 3-vector of w components to scale triangle vertices.
 44 |             target_image_name: image file name to compare result against.
 45 |         """
 46 |         clip_init = np.array(
 47 |             [
 48 |                 [-0.5, -0.5, 0.8, 1.0],
 49 |                 [0.0, 0.5, 0.3, 1.0],
 50 |                 [0.5, -0.5, 0.3, 1.0]
 51 |             ], dtype=np.float32)
 52 |         clip_init = clip_init * np.reshape(
 53 |             np.array(w_vector, dtype=np.float32), [3, 1])
 54 | 
 55 |         clip_coordinates = torch.tensor(clip_init)
 56 |         triangles = torch.tensor([[0, 1, 2]], dtype=torch.int32)
 57 | 
 58 |         _, barycentric_coords, _ = (
 59 |             rasterize_barycentric(
 60 |                 clip_coordinates,
 61 |                 triangles,
 62 |                 self.image_width,
 63 |                 self.image_height))
 64 |         image = torch.cat(
 65 |             [barycentric_coords,
 66 |              torch.ones([self.image_height, self.image_width, 1])], dim=2)
 67 |         baseline_image_path = os.path.join(self.test_data_directory,
 68 |                                            target_image_name)
 69 |         test_utils.expect_image_file_and_render_are_near(
 70 |             self, baseline_image_path, image)
 71 | 
 72 |     def testRendersSimpleTriangle(self):
 73 |         self.runTriangleTest((1.0, 1.0, 1.0), "Simple_Triangle.png")
 74 | 
 75 |     def testRendersPerspectiveCorrectTriangle(self):
 76 |         self.runTriangleTest((0.2, 0.5, 2.0),
 77 |                              "Perspective_Corrected_Triangle.png")
 78 | 
 79 |     def testRendersTwoCubesInBatch(self):
 80 |         """Renders a simple cube in two viewpoints to test the python wrapper.
 81 |         """
 82 | 
 83 |         vertex_rgb = (self.cube_vertex_positions * 0.5 + 0.5)
 84 |         vertex_rgba = torch.cat([vertex_rgb, torch.ones([8, 1])], dim=1)
 85 | 
 86 |         center = torch.tensor([[0, 0, 0]], dtype=torch.float32)
 87 |         world_up = torch.tensor([[0, 1, 0]], dtype=torch.float32)
 88 |         look_at_1 = camera_utils.look_at(
 89 |             torch.tensor([[2, 3, 6]], dtype=torch.float32),
 90 |             center,
 91 |             world_up)
 92 |         look_at_2 = camera_utils.look_at(
 93 |             torch.tensor([[-3, 1, 6]], dtype=torch.float32),
 94 |             center,
 95 |             world_up)
 96 |         projection_1 = torch.matmul(self.perspective, look_at_1)
 97 |         projection_2 = torch.matmul(self.perspective, look_at_2)
 98 |         projection = torch.cat([projection_1, projection_2], dim=0)
 99 |         background_value = torch.Tensor([0., 0., 0., 0.])
100 | 
101 |         rendered = rasterize(
102 |             torch.stack([self.cube_vertex_positions,
103 |                          self.cube_vertex_positions]),
104 |             torch.stack([vertex_rgba, vertex_rgba]),
105 |             self.cube_triangles,
106 |             projection,
107 |             self.image_width,
108 |             self.image_height,
109 |             background_value)
110 | 
111 |         for i in (0, 1):
112 |             image = rendered[i, :, :, :]
113 |             baseline_image_name = "Unlit_Cube_{}.png".format(i)
114 |             baseline_image_path = os.path.join(self.test_data_directory,
115 |                                                baseline_image_name)
116 |             test_utils.expect_image_file_and_render_are_near(
117 |                 self, baseline_image_path, image)
118 | 
119 |     def testSimpleTriangleGradientComputation(self):
120 |         """Verify the Jacobian matrix for a single pixel.
121 | 
122 |         The pixel is in the center of a triangle facing the camera. This makes
123 |         it easy to check which entries of the Jacobian might not make sense
124 |         without worrying about corner cases.
125 |         """
126 |         test_pixel_x = 325
127 |         test_pixel_y = 245
128 | 
129 |         triangles = torch.tensor([[0, 1, 2]], dtype=torch.int32)
130 | 
131 |         def rasterize_test_pixels(clip_coordinates):
132 |             _, barycentric_coords, _ = (
133 |                 rasterize_barycentric(
134 |                     clip_coordinates,
135 |                     triangles,
136 |                     self.image_width,
137 |                     self.image_height))
138 | 
139 |             pixels_to_compare = barycentric_coords[
140 |                 test_pixel_y: test_pixel_y + 1, test_pixel_x: test_pixel_x + 1, :]
141 |             return pixels_to_compare
142 | 
143 |         test_clip_coordinates = torch.tensor(
144 |             [[-0.5, -0.5, 0.8, 1.0],
145 |              [0.0, 0.5, 0.3, 1.0],
146 |              [0.5, -0.5, 0.3, 1.0]],
147 |             dtype=torch.float32,
148 |             requires_grad=True)
149 |         jacobians_match = torch.autograd.gradcheck(
150 |             rasterize_test_pixels,
151 |             test_clip_coordinates,
152 |             eps=4e-2,
153 |             atol=0.1,
154 |             rtol=0.01)
155 |         self.assertTrue(
156 |             jacobians_match,
157 |             "Analytical and numerical jacobians have too many relative or "
158 |             "absolute outliers")
159 | 
160 |     def testInternalRenderGradientComputation(self):
161 |         """Isolates and verifies the Jacobian matrix for the custom kernel."""
162 |         image_height = 21
163 |         image_width = 28
164 | 
165 |         def get_barycentric_coordinates(clip_coordinates):
166 |             _, barycentric_coords, _ = (
167 |                 rasterize_barycentric(
168 |                     clip_coordinates,
169 |                     self.cube_triangles,
170 |                     image_width,
171 |                     image_height))
172 |             return barycentric_coords
173 | 
174 |         # Precomputed transformation of the simple cube to normalized device
175 |         # coordinates, in order to isolate the rasterization gradient.
176 |         test_clip_coordinates = torch.tensor(
177 |             [[-0.43889722, -0.53184521, 0.85293502, 1.0],
178 |              [-0.37635487, 0.22206162, 0.90555805, 1.0],
179 |              [-0.22849123, 0.76811147, 0.80993629, 1.0],
180 |              [-0.2805393, -0.14092168, 0.71602166, 1.0],
181 |              [0.18631913, -0.62634289, 0.88603103, 1.0],
182 |              [0.16183566, 0.08129397, 0.93020856, 1.0],
183 |              [0.44147962, 0.53497446, 0.85076219, 1.0],
184 |              [0.53008741, -0.31276882, 0.77620775, 1.0]],
185 |             dtype=torch.float32,
186 |             requires_grad=True)
187 |         raster_out = get_barycentric_coordinates(test_clip_coordinates)
188 |         analytical = test_utils.get_analytical_jacobian(
189 |             test_clip_coordinates, raster_out)
190 |         numerical = test_utils.get_numerical_jacobian(
191 |             get_barycentric_coordinates, test_clip_coordinates, eps=4e-2)
192 | 
193 |         jacobians_match = (
194 |             test_utils.check_jacobians_are_nearly_equal(
195 |                 analytical, numerical, 0.01, 0.01))
196 |         self.assertTrue(
197 |             jacobians_match,
198 |             "Analytical and numerical jacobians have too many relative or "
199 |             "absolute outliers")
200 | 
201 | 
202 | if __name__ == "__main__":
203 |     unittest.main()
204 | 


--------------------------------------------------------------------------------
/src/mesh_renderer/render.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Differentiable 3D rendering of a triangle mesh based on the
  3 | sampled un-clipped barycentric approach from Genova 2018.
  4 | """
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import torch
 11 | 
 12 | from ..common import camera_utils
 13 | from .rasterize import rasterize
 14 | 
 15 | 
 16 | def render(
 17 |     vertices,
 18 |     triangles,
 19 |     normals,
 20 |     diffuse_colors,
 21 |     camera_position,
 22 |     camera_lookat,
 23 |     camera_up,
 24 |     light_positions,
 25 |     light_intensities,
 26 |     image_width,
 27 |     image_height,
 28 |     specular_colors=None,
 29 |     shininess_coefficients=None,
 30 |     ambient_color=None,
 31 |     fov_y=40.0,
 32 |     near_clip=0.01,
 33 |     far_clip=10.0):
 34 |     """Renders an input scene using phong shading, and returns an output image.
 35 | 
 36 |     Args:
 37 |       vertices: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each
 38 |         triplet is an xyz position in world space.
 39 |       triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet
 40 |         should contain vertex indices describing a triangle such that the
 41 |         triangle's normal points toward the viewer if the forward order of the
 42 |         triplet defines a clockwise winding of the vertices. Gradients with
 43 |         respect to this tensor are not available.
 44 |         # TODO: make this use CCW ordering to be consistent with soft_mesh_renderer
 45 |       normals: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each
 46 |         triplet is the xyz vertex normal for its corresponding vertex. Each
 47 |         vector is assumed to be already normalized.
 48 |       diffuse_colors: 3D float32 tensor with shape [batch_size,
 49 |         vertex_count, 3]. The RGB diffuse reflection in the range [0, 1] for
 50 |         each vertex.
 51 |       camera_position: 2D tensor with shape [batch_size, 3] or 1D tensor with
 52 |         shape [3] specifying the XYZ world space camera position.
 53 |       camera_lookat: 2D tensor with shape [batch_size, 3] or 1D tensor with
 54 |         shape [3] containing an XYZ point along the center of the camera's gaze.
 55 |       camera_up: 2D tensor with shape [batch_size, 3] or 1D tensor with shape
 56 |         [3] containing the up direction for the camera. The camera will have
 57 |         no tilt with respect to this direction.
 58 |       light_positions: a 3D tensor with shape [batch_size, light_count, 3]. The
 59 |         XYZ position of each light in the scene. In the same coordinate space as
 60 |         pixel_positions.
 61 |       light_intensities: a 3D tensor with shape [batch_size, light_count, 3].
 62 |         The RGB intensity values for each light. Intensities may be above 1.
 63 |       image_width: int specifying desired output image width in pixels.
 64 |       image_height: int specifying desired output image height in pixels.
 65 |       specular_colors: 3D float32 tensor with shape [batch_size,
 66 |         vertex_count, 3]. The RGB specular reflection in the range [0, 1] for
 67 |         each vertex. If supplied, specular reflections will be computed, and
 68 |         both specular colors and shininess_coefficients are expected.
 69 |       shininess_coefficients: a 0D-2D float32 tensor with maximum shape
 70 |         [batch_size, vertex_count]. The phong shininess coefficient of each
 71 |         vertex. A 0D tensor or float gives a constant shininess coefficient of
 72 |         all vertices across all batches and images. A 1D tensor must have shape
 73 |         [batch_size], and a single shininess coefficient per image is used.
 74 |       ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient
 75 |         color, which is added to each pixel in the scene. If None, it is
 76 |         assumed to be black.
 77 |       fov_y: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
 78 |         desired output image y field of view in degrees.
 79 |       near_clip: float, 0D tensor, or 1D tensor with shape [batch_size]
 80 |         specifying near clipping plane distance.
 81 |       far_clip: float, 0D tensor, or 1D tensor with shape [batch_size]
 82 |         specifying far clipping plane distance.
 83 | 
 84 |     Returns:
 85 |       A 4D float32 tensor of shape [batch_size, image_height, image_width, 4]
 86 |       containing the lit RGBA color values for each image at each pixel. RGB
 87 |       colors are the intensity values before tonemapping and can be in the range
 88 |       [0, infinity]. Clipping to the range [0, 1] with np.clip is likely
 89 |       reasonable for both viewing and training most scenes. More complex scenes
 90 |       with multiple lights should tone map color values for display only. One
 91 |       simple tonemapping approach is to rescale color values as x/(1+x); gamma
 92 |       compression is another common technique. Alpha values are zero for
 93 |       background pixels and near one for mesh pixels.
 94 |     Raises:
 95 |       ValueError: An invalid argument to the method is detected.
 96 |     """
 97 |     if len(vertices.shape) != 3 or vertices.shape[-1] != 3:
 98 |         raise ValueError(
 99 |             "Vertices must have shape [batch_size, vertex_count, 3].")
100 |     batch_size = vertices.shape[0]
101 |     if len(normals.shape) != 3 or normals.shape[-1] != 3:
102 |         raise ValueError(
103 |             "Normals must have shape [batch_size, vertex_count, 3].")
104 |     if len(light_positions.shape) != 3 or light_positions.shape[-1] != 3:
105 |         raise ValueError(
106 |             "light_positions must have shape [batch_size, light_count, 3].")
107 |     if len(light_intensities.shape) != 3 or light_intensities.shape[-1] != 3:
108 |         raise ValueError(
109 |             "light_intensities must have shape [batch_size, light_count, 3].")
110 |     if len(diffuse_colors.shape) != 3 or diffuse_colors.shape[-1] != 3:
111 |         raise ValueError(
112 |             "diffuse_colors must have shape [batch_size, vertex_count, 3].")
113 |     if (ambient_color is not None and
114 |         list(ambient_color.shape) != [batch_size, 3]):
115 |         raise ValueError("ambient_color must have shape [batch_size, 3].")
116 |     if list(camera_position.shape) == [3]:
117 |         camera_position = torch.unsqueeze(camera_position, 0).repeat(batch_size, 1)
118 |     elif list(camera_position.shape) != [batch_size, 3]:
119 |         raise ValueError(
120 |             "camera_position must have shape [batch_size, 3] or [3].")
121 |     if list(camera_lookat.shape) == [3]:
122 |         camera_lookat = torch.unsqueeze(camera_lookat, 0).repeat(batch_size, 1)
123 |     elif list(camera_lookat.shape) != [batch_size, 3]:
124 |         raise ValueError(
125 |             "camera_lookat must have shape [batch_size, 3] or [3].")
126 |     if list(camera_up.shape) == [3]:
127 |         camera_up = torch.unsqueeze(camera_up, 0).repeat(batch_size, 1)
128 |     elif list(camera_up.shape) != [batch_size, 3]:
129 |         raise ValueError("camera_up must have shape [batch_size, 3] or [3].")
130 |     if isinstance(fov_y, float):
131 |         fov_y = torch.tensor(batch_size * [fov_y], dtype=torch.float32)
132 |     elif len(fov_y.shape) == 0:
133 |         fov_y = torch.unsqueeze(fov_y, 0).repeat(batch_size)
134 |     elif list(fov_y.shape) != [batch_size]:
135 |         raise ValueError("fov_y must be a float, a 0D tensor, or a 1D tensor "
136 |                          "with shape [batch_size].")
137 |     if isinstance(near_clip, float):
138 |         near_clip = torch.tensor(batch_size * [near_clip], dtype=torch.float32)
139 |     elif len(near_clip.shape) == 0:
140 |         near_clip = torch.unsqueeze(near_clip, 0).repeat(batch_size)
141 |     elif list(near_clip.shape) != [batch_size]:
142 |         raise ValueError("near_clip must be a float, a 0D tensor, or a 1D "
143 |                          "tensor with shape [batch_size].")
144 |     if isinstance(far_clip, float):
145 |         far_clip = torch.tensor(batch_size * [far_clip], dtype=torch.float32)
146 |     elif len(far_clip.shape) == 0:
147 |         far_clip = torch.unsqueeze(far_clip, 0).repeat(batch_size)
148 |     elif list(far_clip.shape) != [batch_size]:
149 |         raise ValueError("far_clip must be a float, a 0D tensor, or a 1D "
150 |                          "tensor with shape [batch_size].")
151 |     if specular_colors is not None and shininess_coefficients is None:
152 |         raise ValueError(
153 |             "Specular colors were supplied without shininess coefficients.")
154 |     if shininess_coefficients is not None and specular_colors is None:
155 |         raise ValueError(
156 |             "Shininess coefficients were supplied without specular colors.")
157 |     if specular_colors is not None:
158 |         # Since a 0D float32 tensor is accepted, also accept a float.
159 |         if isinstance(shininess_coefficients, float):
160 |             shininess_coefficients = torch.tensor(
161 |                 shininess_coefficients, dtype=torch.float32)
162 |         if len(specular_colors.shape) != 3:
163 |             raise ValueError("The specular colors must have shape [batch_size, "
164 |                              "vertex_count, 3].")
165 |         if len(shininess_coefficients.shape) > 2:
166 |             raise ValueError("The shininess coefficients must have shape at "
167 |                              "most [batch_size, vertex_count].")
168 |         # If we don't have per-vertex coefficients, we can just reshape the
169 |         # input shininess to broadcast later, rather than interpolating an
170 |         # additional vertex attribute:
171 |         if len(shininess_coefficients.shape) < 2:
172 |             vertex_attributes = torch.cat(
173 |                 [normals, vertices, diffuse_colors, specular_colors], 2)
174 |         else:
175 |             vertex_attributes = torch.cat(
176 |                 [
177 |                     normals, vertices, diffuse_colors, specular_colors,
178 |                     torch.unsqueeze(shininess_coefficients, 2)
179 |                 ], 2)
180 |     else:
181 |         vertex_attributes = torch.cat([normals, vertices, diffuse_colors], 2)
182 | 
183 |     camera_matrices = camera_utils.look_at(camera_position, camera_lookat,
184 |                                            camera_up)
185 | 
186 |     perspective_transforms = camera_utils.perspective(
187 |         image_width / image_height,
188 |         fov_y,
189 |         near_clip,
190 |         far_clip)
191 | 
192 |     clip_space_transforms = torch.matmul(perspective_transforms, camera_matrices)
193 | 
194 |     pixel_attributes = rasterize(
195 |         vertices, vertex_attributes, triangles,
196 |         clip_space_transforms, image_width, image_height,
197 |         torch.tensor([-1] * vertex_attributes.shape[2]))
198 | 
199 |     # Extract the interpolated vertex attributes from the pixel buffer and
200 |     # supply them to the shader:
201 |     pixel_normals = torch.nn.functional.normalize(
202 |         pixel_attributes[:, :, :, 0:3], p=2, dim=3)
203 |     pixel_positions = pixel_attributes[:, :, :, 3:6]
204 |     diffuse_colors = pixel_attributes[:, :, :, 6:9]
205 |     if specular_colors is not None:
206 |         specular_colors = pixel_attributes[:, :, :, 9:12]
207 |         # Retrieve the interpolated shininess coefficients if necessary, or just
208 |         # reshape our input for broadcasting:
209 |         if len(shininess_coefficients.shape) == 2:
210 |             shininess_coefficients = pixel_attributes[:, :, :, 12]
211 |         else:
212 |             shininess_coefficients = torch.reshape(
213 |                 shininess_coefficients, [-1, 1, 1])
214 | 
215 |     pixel_mask = (diffuse_colors >= 0.0).any(dim=3).type(torch.float32)
216 | 
217 |     renders = phong_shader(
218 |         normals=pixel_normals,
219 |         alphas=pixel_mask,
220 |         pixel_positions=pixel_positions,
221 |         light_positions=light_positions,
222 |         light_intensities=light_intensities,
223 |         diffuse_colors=diffuse_colors,
224 |         camera_position=camera_position if specular_colors is not None else None,
225 |         specular_colors=specular_colors,
226 |         shininess_coefficients=shininess_coefficients,
227 |         ambient_color=ambient_color)
228 |     return renders
229 | 
230 | 
231 | def phong_shader(normals,
232 |                  alphas,
233 |                  pixel_positions,
234 |                  light_positions,
235 |                  light_intensities,
236 |                  diffuse_colors=None,
237 |                  camera_position=None,
238 |                  specular_colors=None,
239 |                  shininess_coefficients=None,
240 |                  ambient_color=None):
241 |     """Compute pixelwise lighting from rasterized buffers with the Phong model.
242 | 
243 |     Args:
244 |         normals: a 4D float32 tensor with shape [batch_size, image_height,
245 |             image_width, 3]. The inner dimension is the world space XYZ normal
246 |             for the corresponding pixel. Should be already normalized.
247 |         alphas: a 3D float32 tensor with shape [batch_size, image_height,
248 |             image_width]. The inner dimension is the alpha value (transparency)
249 |             for the corresponding pixel.
250 |         pixel_positions: a 4D float32 tensor with shape [batch_size,
251 |             image_height, image_width, 3]. The inner dimension is the world
252 |             space XYZ position for the corresponding pixel.
253 |         light_positions: a 3D tensor with shape [batch_size, light_count, 3].
254 |             The XYZ position of each light in the scene. In the same coordinate
255 |             space as pixel_positions.
256 |         light_intensities: a 3D tensor with shape [batch_size, light_count, 3].
257 |             The RGB intensity values for each light. Intensities may be above 1.
258 |         diffuse_colors: a 4D float32 tensor with shape [batch_size, image_height,
259 |             image_width, 3]. The inner dimension is the diffuse RGB coefficients
260 |             at a pixel in the range [0, 1].
261 |         camera_position: a 1D tensor with shape [batch_size, 3]. The XYZ camera
262 |             position in the scene. If supplied, specular reflections will be
263 |             computed. If not supplied, specular_colors and shininess_coefficients
264 |             are expected to be None. In the same coordinate space as
265 |             pixel_positions.
266 |         specular_colors: a 4D float32 tensor with shape [batch_size,
267 |             image_height, image_width, 3]. The inner dimension is the specular
268 |             RGB coefficients at a pixel in the range [0, 1]. If None, assumed
269 |             to be torch.zeros().
270 |         shininess_coefficients: a 3D float32 tensor that is broadcasted to
271 |             shape [batch_size, image_height, image_width]. The inner dimension
272 |             is the shininess coefficient for the object at a pixel. Dimensions
273 |             that are constant can be given length 1, so [batch_size, 1, 1] and
274 |             [1, 1, 1] are also valid input shapes.
275 |         ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient
276 |             color, which is added to each pixel before tone mapping. If None,
277 |             it is assumed to be torch.zeros().
278 | 
279 |     Returns:
280 |         A 4D float32 tensor of shape [batch_size, image_height, image_width, 4]
281 |         containing the lit RGBA color values for each image at each pixel.
282 |         Colors are in the range [0, 1].
283 | 
284 |     Raises:
285 |         ValueError: An invalid argument to the method is detected.
286 |     """
287 |     batch_size, image_height, image_width = [s for s in normals.shape[:-1]]
288 |     light_count = light_positions.shape[1]
289 |     pixel_count = image_height * image_width
290 |     # Reshape all values to easily do pixelwise computations:
291 |     normals = torch.reshape(normals, [batch_size, -1, 3])
292 |     alphas = torch.reshape(alphas, [batch_size, -1, 1])
293 |     diffuse_colors = torch.reshape(diffuse_colors, [batch_size, -1, 3])
294 |     if camera_position is not None:
295 |         specular_colors = torch.reshape(specular_colors, [batch_size, -1, 3])
296 | 
297 |     # Ambient component
298 |     output_colors = torch.zeros([batch_size, image_height * image_width, 3])
299 |     if ambient_color is not None:
300 |         ambient_reshaped = torch.unsqueeze(ambient_color, 1)
301 |         output_colors = output_colors + ambient_reshaped * diffuse_colors
302 | 
303 |     # Diffuse component
304 |     pixel_positions = torch.reshape(pixel_positions, [batch_size, -1, 3])
305 |     per_light_pixel_positions = torch.stack(
306 |         [pixel_positions] * light_count,
307 |         dim=1) # [batch_size, light_count, pixel_count, 3]
308 |     directions_to_lights = torch.nn.functional.normalize(
309 |         torch.unsqueeze(light_positions, 2) - per_light_pixel_positions,
310 |         p=2,
311 |         dim=3) # [batch_size, light_count, pixel_count, 3]
312 |     # The specular component should only contribute when the light and normal
313 |     # face one another (i.e. the dot product is nonnegative):
314 |     normals_dot_lights = torch.clamp(
315 |         torch.sum(
316 |             torch.unsqueeze(normals, 1) * directions_to_lights, dim=3),
317 |         0.0, 1.0) # [batch_size, light_count, pixel_count]
318 |     diffuse_output = (
319 |         torch.unsqueeze(diffuse_colors, 1) *
320 |         torch.unsqueeze(normals_dot_lights, 3) *
321 |         torch.unsqueeze(light_intensities, 2))
322 |     diffuse_output = torch.sum(diffuse_output, dim=1) # [batch_size, pixel_count, 3]
323 |     output_colors = output_colors + diffuse_output
324 | 
325 |     # Specular component
326 |     if camera_position is not None:
327 |         camera_position = torch.reshape(camera_position, [batch_size, 1, 3])
328 |         mirror_reflection_direction = torch.nn.functional.normalize(
329 |             2.0 * torch.unsqueeze(normals_dot_lights, 3) * torch.unsqueeze(
330 |                 normals, 1) - directions_to_lights,
331 |             p=2,
332 |             dim=3) # [batch_size, light_count, pixel_count, 3]
333 |         direction_to_camera = torch.nn.functional.normalize(
334 |             camera_position - pixel_positions,
335 |             p=2,
336 |             dim=2) # [batch_size, pixel_count, 3]
337 |         reflection_direction_dot_camera_direction = torch.sum(
338 |             mirror_reflection_direction * torch.unsqueeze(direction_to_camera, 1),
339 |             dim=3)
340 |         # The specular component should only contribute when the reflection is
341 |         # external:
342 |         reflection_direction_dot_camera_direction = torch.clamp(
343 |             torch.nn.functional.normalize(
344 |                 reflection_direction_dot_camera_direction,
345 |                 p=2,
346 |                 dim=2),
347 |             0.0,
348 |             1.0)
349 |         # The specular component should also only contribute when the diffuse
350 |         # component contributes:
351 |         reflection_direction_dot_camera_direction = torch.where(
352 |             normals_dot_lights != 0.0,
353 |             reflection_direction_dot_camera_direction,
354 |             torch.zeros_like(
355 |                 reflection_direction_dot_camera_direction,
356 |                 dtype=torch.float32))
357 |         # Reshape to support broadcasting the shininess coefficient, which
358 |         # rarely varies per-vertex:
359 |         reflection_direction_dot_camera_direction = torch.reshape(
360 |             reflection_direction_dot_camera_direction,
361 |             [batch_size, light_count, image_height, image_width])
362 |         shininess_coefficients = torch.unsqueeze(shininess_coefficients, 1)
363 |         specularity = torch.reshape(
364 |             torch.pow(reflection_direction_dot_camera_direction,
365 |                       shininess_coefficients),
366 |             [batch_size, light_count, pixel_count, 1])
367 |         specular_output = (
368 |             torch.unsqueeze(specular_colors, 1) * specularity *
369 |             torch.unsqueeze(light_intensities, 2)
370 |         )
371 |         specular_output = torch.sum(specular_output, dim=1)
372 |         output_colors = output_colors + specular_output
373 |     rgb_images = torch.reshape(
374 |         output_colors,
375 |         [batch_size, image_height, image_width, 3])
376 |     alpha_images = torch.reshape(
377 |         alphas,
378 |         [batch_size, image_height, image_width, 1])
379 |     valid_rgb_values = torch.cat(3 * [alpha_images > 0.5], dim=3)
380 |     rgb_images = torch.where(
381 |         valid_rgb_values,
382 |         rgb_images,
383 |         torch.zeros_like(rgb_images, dtype=torch.float32))
384 |     return torch.flip(
385 |         torch.cat([rgb_images, alpha_images], dim=3),
386 |         dims=[1])
387 | 
388 | 
389 | def tone_mapper(image, gamma):
390 |     """Apply gamma correction to the input image.
391 | 
392 |     Tone maps the input image batch in order to make scenes with a high dynamic
393 |     range viewable. The gamma correction factor is computed separately per
394 |     image, but is shared between all provided channels. The exact function
395 |     computed is:
396 | 
397 |     image_out = A*image_in^gamma, where A is an image-wide constant computed
398 |     so that the maximum image value is approximately 1. The correction is
399 |     applied to all channels.
400 | 
401 |     Args:
402 |         image: 4D float32 tensor with shape [batch_size, image_height,
403 |             image_width, channel_count]. The batch of images to tone map.
404 |         gamma: 0D float32 nonnegative tensor. Values of gamma below 1 compress
405 |             relative contrast in the image, and values above one increase it.
406 |             A value of 1 is equivalent to scaling the image to have a max value
407 |             of 1.
408 |     Returns:
409 |         4D float32 tensor with shape [batch_size, image_height, image_width,
410 |         channel_count]. Contains the gamma-corrected images, clipped to the
411 |         range [0, 1].
412 |     """
413 |     batch_size = image.shape[0]
414 |     corrected_image = torch.pow(image, gamma)
415 |     image_max = torch.max(
416 |         torch.reshape(corrected_image, [batch_size, -1]), 1).values
417 |     scaled_image = (
418 |         corrected_image / torch.reshape(image_max, [batch_size, 1, 1, 1]))
419 |     return torch.clamp(scaled_image, 0.0, 1.0)
420 | 


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Barycentrics_Cube.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Barycentrics_Cube.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Colored_Cube_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Colored_Cube_0.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Colored_Cube_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Colored_Cube_1.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/External_Triangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/External_Triangle.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Gray_Cube_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Gray_Cube_0.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Gray_Cube_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Gray_Cube_1.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Inside_Box.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Inside_Box.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Perspective_Corrected_Triangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Perspective_Corrected_Triangle.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Simple_Tetrahedron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Simple_Tetrahedron.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Simple_Triangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Simple_Triangle.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Unlit_Cube_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Unlit_Cube_0.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_data/Unlit_Cube_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewkchan/pytorch_mesh_renderer/e868188504917b379b91e690d80b5695361c633a/src/mesh_renderer/test_data/Unlit_Cube_1.png


--------------------------------------------------------------------------------
/src/mesh_renderer/test_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import numpy as np
  7 | from skimage import io
  8 | import torch
  9 | from itertools import product
 10 | 
 11 | 
 12 | def check_jacobians_are_nearly_equal(theoretical,
 13 |                                      numerical,
 14 |                                      outlier_relative_error_threshold,
 15 |                                      max_outlier_fraction,
 16 |                                      include_jacobians_in_error_message=False):
 17 |     """Compare two Jacobian matrices, allowing for some fraction of outliers.
 18 | 
 19 |     Args:
 20 |     theoretical: 2D numpy array containing a Jacobian matrix with entries
 21 |         computed via gradient functions. The layout should be as in the output
 22 |         of torch.autograd.gradcheck.get_analytical_jacobian.
 23 |     numerical: 2D numpy array of the same shape as theoretical containing a
 24 |         Jacobian matrix with entries computed via finite difference
 25 |         approximations. The layout should be as in the output of
 26 |         torch.autograd.gradcheck.get_numerical_jacobian.
 27 |     outlier_relative_error_threshold: float prescribing the max relative
 28 |         error (from the finite difference approximation) tolerated before an
 29 |         entry is considered an outlier.
 30 |     max_outlier_fraction: float defining the max fraction of entries in
 31 |         theoreticdal that may be outliers before the check returns False.
 32 |     include_jacobians_in_error_message: bool defining whether the jacobian
 33 |         matrices should be included in the return message if the test fails.
 34 | 
 35 |     Returns:
 36 |     A tuple (success: bool, error_msg: str).
 37 |     """
 38 |     outlier_gradients = np.abs(
 39 |         numerical - theoretical) / numerical > outlier_relative_error_threshold
 40 |     outlier_fraction = (
 41 |         np.count_nonzero(outlier_gradients) / np.prod(numerical.shape[:2]))
 42 |     jacobians_match = outlier_fraction <= max_outlier_fraction
 43 | 
 44 |     message = (
 45 |         " %f of theoretical gradients are relative outliers, but the maximum"
 46 |         "allowable fraction is %f " % (outlier_fraction, max_outlier_fraction))
 47 |     if include_jacobians_in_error_message:
 48 |         # The gradient checker convention is the typical Jacobian transposed:
 49 |         message += ("\nNumerical Jacobian:\n%s\nTheoretical Jacobian:\n%s" %
 50 |                     (repr(numerical.T), repr(theoretical.T)))
 51 |     return jacobians_match, message
 52 | 
 53 | 
 54 | def get_analytical_jacobian(input, output):
 55 |     """Compute the analytical jacobian for a function with a single
 56 |        differentiable argument.
 57 |     """
 58 |     jacobian = torch.zeros(input.numel(), output.numel())
 59 |     grad_output = torch.zeros_like(output)
 60 |     flag_grad_output = grad_output.view(-1)
 61 | 
 62 |     for i in range(flag_grad_output.numel()):
 63 |         flag_grad_output.zero_()
 64 |         flag_grad_output[i] = 1
 65 |         d_x = torch.autograd.grad(output, [input], grad_output,
 66 |                                   retain_graph=True, allow_unused=True)[0]
 67 |         x = input
 68 |         if jacobian.numel() != 0:
 69 |             if d_x is None:
 70 |                 jacobian[:, i].zero_()
 71 |             else:
 72 |                 d_x_dense = (d_x.to_dense()
 73 |                              if not d_x.layout == torch.strided else d_x)
 74 |                 assert jacobian[:, i].numel() == d_x_dense.numel()
 75 |                 jacobian[:, i] = d_x_dense.contiguous().view(-1)
 76 | 
 77 |     return jacobian
 78 | 
 79 | 
 80 | def get_numerical_jacobian(fn, input, eps=1e-3):
 81 |     """Compute the numerical Jacobian using finite differences.
 82 | 
 83 |     Args:
 84 |         fn: The function to differentiate.
 85 |         input: input to `fn`
 86 |         eps: Finite difference epsilon.
 87 |     """
 88 |     output_size = fn(input).numel()
 89 |     jacobian = torch.zeros(input.numel(), output_size)
 90 |     x_tensor = input.data
 91 |     d_tensor = jacobian
 92 |     for d_idx, x_idx in enumerate(product(*[range(m) for m in x_tensor.size()])):
 93 |         orig = x_tensor[x_idx].item()
 94 |         x_tensor[x_idx] = orig - eps
 95 |         outa = fn(input).clone()
 96 |         x_tensor[x_idx] = orig + eps
 97 |         outb = fn(input).clone()
 98 |         x_tensor[x_idx] = orig
 99 |         r = (outb - outa) / (2 * eps)
100 |         d_tensor[d_idx] = r.detach().reshape(-1)
101 | 
102 |     return jacobian
103 | 
104 | 
105 | def expect_image_file_and_render_are_near(test_instance,
106 |                                           baseline_path,
107 |                                           result_image,
108 |                                           max_outlier_fraction=0.001,
109 |                                           pixel_error_threshold=0.01):
110 |     """Compares the output of mesh_renderer with an image on disk.
111 | 
112 |     The comparison is soft: the images are considered identical if at most
113 |     max_outlier_fraction of the pixels differ by more than a relative error of
114 |     pixel_error_threshold of the full color value. Note that before comparison,
115 |     mesh renderer values are clipped to the range [0,1].
116 | 
117 |     Uses _images_are_near for the actual comparison.
118 | 
119 |     Args:
120 |       test_instance: a python unittest.TestCase instance.
121 |       baseline_path: path to the reference image on disk.
122 |       result_image: the result image, as a Tensor.
123 |       max_outlier_fraction: the maximum fraction of outlier pixels allowed.
124 |       pixel_error_threshold: pixel values are considered different if their
125 |         difference exceeds this amount. Range is 0.0 - 1.0.
126 |     """
127 |     baseline_image = io.imread(baseline_path)
128 | 
129 |     test_instance.assertEqual(baseline_image.shape, result_image.shape,
130 |                               "Images shapes {}and {} do not match."
131 |                               .format(baseline_image.shape, result_image.shape))
132 | 
133 |     result_image = result_image.numpy()
134 |     result_image = np.clip(result_image, 0., 1.).copy(order="C")
135 |     baseline_image = baseline_image.astype(float) / 255.0
136 | 
137 |     diff_image = np.abs(baseline_image - result_image)
138 |     outlier_channels = diff_image > pixel_error_threshold
139 |     outlier_pixels = np.any(outlier_channels, axis=2)
140 |     outlier_count = np.count_nonzero(outlier_pixels)
141 |     outlier_fraction = outlier_count / np.prod(baseline_image.shape[:2])
142 |     images_match = outlier_fraction <= max_outlier_fraction
143 | 
144 |     outputs_dir = "/tmp"  # os.environ["TEST_TMPDIR"]
145 |     base_prefix = os.path.splitext(os.path.basename(baseline_path))[0]
146 |     result_output_path = os.path.join(outputs_dir, base_prefix + "_result.png")
147 |     diff_output_path = os.path.join(outputs_dir, base_prefix + "_diff.png")
148 | 
149 |     message = ("{} does not match. ({} of pixels are outliers, {} is allowed.)."
150 |                " Result image written to {}, Diff written to {}"
151 |                .format(
152 |                    baseline_path, outlier_fraction,
153 |                    max_outlier_fraction, result_output_path, diff_output_path))
154 | 
155 |     if not images_match:
156 |         io.imsave(result_output_path, (result_image * 255.0).astype(np.uint8))
157 |         diff_image[:,:,3] = 1.0
158 |         io.imsave(diff_output_path, (diff_image * 255.0).astype(np.uint8))
159 | 
160 |     test_instance.assertTrue(images_match, msg=message)
161 | 


--------------------------------------------------------------------------------
/src/soft_mesh_renderer/README.md:
--------------------------------------------------------------------------------
 1 | # soft_mesh_renderer
 2 | 
 3 | This package contains a differentiable, 3D mesh renderer using the probabilistic rasterization formulation by [Liu et al. 2019 "Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning"](https://arxiv.org/abs/1904.01786). It is an alternate implementation of [SoftRas](https://github.com/ShichenLiu/SoftRas) that I built for my own learning. Compare also the implementation from [PyTorch3D](https://github.com/facebookresearch/pytorch3d).
 4 | 
 5 | The renderer supports rendering textured triangle meshes to images with diffuse phong shading including multiple lights. Gradients of the image RGBA pixels can be obtained with respect to mesh vertices, texture colors, camera parameters, and lights.
 6 | 
 7 | The code is un-optimized as it's Python-only compared to the original which implements forward and backwards passes with dedicated CUDA kernels, but I hope it's more readable and others will find it useful.
 8 | 
 9 | # Testing
10 | 
11 | Run from the repository root:
12 | ```
13 | python -m src.soft_mesh_renderer.test_rasterize
14 | ```
15 | 
16 | # Usage
17 | 
18 | ## Rendering a shaded mesh
19 | 
20 | Rendering a shaded mesh can be done with the `render` function in `soft_mesh_renderer/render.py`. This function renders an input scene (mesh, lights, and camera) using phong shading, and returns an output image.
21 | 
22 | #### Args:
23 | 
24 | - `vertices`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. Each triplet is an xyz position in world space.
25 | - `triangles`: 2D int32 tensor with shape `[triangle_count, 3]`. Each triplet should contain vertex indices describing a triangle such that the triangle's normal points toward the viewer if the forward order of the triplet defines a counter-clockwise winding of the vertices. Gradients with respect to this tensor are not available.
26 | - `diffuse_colors`: 3D float32 tensor with shape `[batch_size, vertex_count, 3]`. The RGB diffuse reflection in the range `[0, 1]` for each vertex.
27 | - `camera_position`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape `[3]` specifying the XYZ world space camera position.
28 | - `camera_lookat`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape `[3]` containing an XYZ point along the center of the camera's gaze.
29 | - `camera_up`: 2D tensor with shape `[batch_size, 3]` or 1D tensor with shape
30 | `[3]` containing the up direction for the camera. The camera will have no tilt with respect to this direction.
31 | - `light_positions`: a 3D tensor with shape `[batch_size, light_count, 3]`. The world space XYZ position of each light in the scene.
32 | - `light_intensities`: a 3D tensor with shape `[batch_size, light_count]`. The intensity values for each light. Intensities may be above 1.
33 | - `image_width`: int specifying desired output image width in pixels.
34 | - `image_height`: int specifying desired output image height in pixels.
35 | - `sigma_val`: parameter controlling the sharpness of the coverage distribution for a single triangle. A smaller sigma leads to a sharper distribution.
36 | - `gamma_val`: temperature parameter controlling uniformity of the triangle probability distribution for a pixel in the depth aggregation. When gamma is 0, all probability mass will fall into the triangle with highest z, matching the behavior of z-buffering.
37 | - `fov_y`: float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying desired output image y field of view in degrees.
38 | - `near_clip`: float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying near clipping plane distance.
39 | - `far_clip`: float, 0D tensor, or 1D tensor with shape `[batch_size]` specifying far clipping plane distance.
40 | 
41 | #### Returns:
42 | 
43 | A 4D float32 tensor of shape `[batch_size, image_height, image_width, 4]` containing the lit RGBA color values for each image at each pixel.
44 | - The RGB values are aggregated per-pixel according to the color aggregation formula in [1].
45 | - The alpha values are aggregated per-pixel according to the silhouette formula in [1].
46 | 
47 | [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning'
48 | 
49 | ### Example
50 | 
51 | An example usage of the differentiable mesh renderer to render a teapot can be seen in [`src/examples/example1b.py`](https://github.com/andrewkchan/pytorch_mesh_renderer/blob/master/src/examples/example1b.py).


--------------------------------------------------------------------------------
/src/soft_mesh_renderer/__init__.py:
--------------------------------------------------------------------------------
1 | from .render import render


--------------------------------------------------------------------------------
/src/soft_mesh_renderer/quadtree.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | def intersects(bbox1, bbox2):
  4 |     """
  5 |     Returns whether two two-dimensional bounding boxes intersect.
  6 | 
  7 |     Args:
  8 |     - bbox1: Tensor of shape [2, 2] where bbox1[0] gives the xy-coordinate
  9 |         of the top-left corner and bbox[1] gives the bottom-right corner.
 10 |     - bbox2: Same format as bbox1.
 11 | 
 12 |     Returns:
 13 |     - A boolean indicating whether the bounding boxes intersect.
 14 |     """
 15 |     return (bbox1[0, 0] <= bbox2[1, 0] and bbox1[1, 0] >= bbox2[0, 0] and
 16 |             bbox1[0, 1] <= bbox2[1, 1] and bbox1[1, 1] >= bbox2[0, 1])
 17 | 
 18 | def contains(bbox, p):
 19 |     """
 20 |     Returns whether a bounding box contains a 2D point p.
 21 | 
 22 |     Args:
 23 |     - bbox: Tensor of shape [2, 2] where bbox1[0] gives the xy-coordinate
 24 |         of the top-left corner and bbox[1] gives the bottom-right corner.
 25 |     - p: Tensor of shape [2].
 26 | 
 27 |     Returns:
 28 |     - A boolean indicating whether bbox contains p.
 29 |     """
 30 |     return (p[0] <= bbox[1][0] and p[0] >= bbox[0][0] and
 31 |             p[1] <= bbox[1][1] and p[1] >= bbox[0][1])
 32 | 
 33 | """
 34 | Quadtree data structure to store geometric data with associated bounding boxes.
 35 | """
 36 | MAX_DEPTH = 5
 37 | class QuadTreeNode:
 38 |     def __init__(self, bbox, depth):
 39 |         self.bbox = bbox
 40 |         self.depth = depth
 41 |         self.data = []
 42 |         self.children = []
 43 | 
 44 |     def insert(self, bbox, data):
 45 |         if len(self.children) != 0:
 46 |             for child in self.children:
 47 |                 if intersects(child.bbox, bbox):
 48 |                     child.insert(bbox, data)
 49 |         else:
 50 |             if len(self.data) != 0 and self.depth < MAX_DEPTH:
 51 |                 # subdivide
 52 |                 next_depth = self.depth + 1
 53 |                 top = self.bbox[0][1]
 54 |                 left = self.bbox[0][0]
 55 |                 right = self.bbox[1][0]
 56 |                 bottom = self.bbox[1][1]
 57 | 
 58 |                 center = (self.bbox[0] + self.bbox[1]) / 2.
 59 |                 self.children = [
 60 |                     # top-left
 61 |                     QuadTreeNode(
 62 |                         torch.stack([
 63 |                             self.bbox[0],
 64 |                             center
 65 |                         ]), next_depth),
 66 |                     # top-right
 67 |                     QuadTreeNode(
 68 |                         torch.stack([
 69 |                             torch.tensor([center[0], top]),
 70 |                             torch.tensor([right, center[1]])
 71 |                         ]), next_depth),
 72 |                     # bottom-left
 73 |                     QuadTreeNode(
 74 |                         torch.stack([
 75 |                             torch.tensor([left, center[1]]),
 76 |                             torch.tensor([center[0], bottom])
 77 |                         ]), next_depth),
 78 |                     # bottom-right
 79 |                     QuadTreeNode(
 80 |                         torch.stack([
 81 |                             center,
 82 |                             self.bbox[1]
 83 |                         ]), next_depth),
 84 |                 ]
 85 |                 self.data.append((bbox, data))
 86 |                 for d_bbox, d in self.data:
 87 |                     for child in self.children:
 88 |                         if intersects(child.bbox, d_bbox):
 89 |                             child.insert(d_bbox, d)
 90 |                 self.data = []
 91 |             else:
 92 |                 self.data.append((bbox, data))
 93 | 
 94 |     def leaf_for_point(self, p):
 95 |         if not contains(self.bbox, p):
 96 |             return None
 97 |         for child in self.children:
 98 |             l = child.leaf_for_point(p)
 99 |             if l:
100 |                 return l
101 |         return self
102 | 


--------------------------------------------------------------------------------
/src/soft_mesh_renderer/rasterize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Differentiable triangle rasterizer using soft rasterization formulation.
  3 | """
  4 | 
  5 | from __future__ import absolute_import
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | 
  9 | import torch
 10 | 
 11 | from ..common import camera_utils
 12 | from .quadtree import QuadTreeNode, contains
 13 | 
 14 | def rasterize(
 15 |     world_space_vertices,
 16 |     triangles,
 17 |     ### vertex attributes
 18 |     normals,
 19 |     diffuse_colors,
 20 |     ### lighting
 21 |     light_positions,
 22 |     light_intensities,
 23 |     ###
 24 |     camera_matrices,
 25 |     image_width,
 26 |     image_height,
 27 |     sigma_val,
 28 |     gamma_val,
 29 |     blur_radius=0.01
 30 | ):
 31 |     """
 32 |     Soft-rasterize a mesh, interpolating vertex attributes, lighting with phong shading,
 33 |     and soft-aggregating the result for every pixel.
 34 | 
 35 |     Args:
 36 |         world_space_vertices: 3D float32 tensor of xyz positions with shape
 37 |             [batch_size, vertex_count, 3].
 38 |         triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet
 39 |             should contain vertex indices describing a triangle such that the
 40 |             triangle's normal points toward the viewer if the forward order of
 41 |             the triplet defines a counter-clockwise winding of the vertices. Gradients
 42 |             with respect to this tensor are not available.
 43 | 
 44 |         normals: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each
 45 |             triplet is the xyz vertex normal for its corresponding vertex. Each
 46 |             vector is assumed to be already normalized.
 47 |         diffuse_colors: 3D float32 tensor with shape [batch_size,
 48 |             vertex_count, 3]. The RGB diffuse reflection in the range [0, 1] for
 49 |             each vertex.
 50 | 
 51 |         light_positions: a 3D tensor with shape [batch_size, light_count, 3]. The
 52 |             world space XYZ position of each light in the scene.
 53 |         light_intensities: a 3D tensor with shape [batch_size, light_count].
 54 |             The intensity values for each light. Intensities may be above 1.
 55 | 
 56 |         camera_matrices: 3D float tensor with shape [batch_size, 4, 4] containing
 57 |             model-view-perspective projection matrices.
 58 |         image_width: int specifying desired output image width in pixels.
 59 |         image_height: int specifying desired output image height in pixels.
 60 |         sigma_val: parameter controlling the sharpness of the coverage distribution
 61 |             for a single triangle. A smaller sigma leads to a sharper distribution.
 62 |         gamma_val: temperature parameter controlling uniformity of the triangle
 63 |             probability distribution for a pixel in the depth aggregation.
 64 |             When gamma is 0, all probability mass will fall into the triangle
 65 |             with highest z, matching the behavior of z-buffering.
 66 |         blur_radius: float specifying the cutoff radius of soft-rasterization sampling
 67 |             in NDC-space.
 68 | 
 69 |     Returns:
 70 |         A 4D float32 tensor of shape [batch_size, image_height, image_width, 4]
 71 |         containing the lit RGBA color values for each image at each pixel.
 72 |         The RGB values are aggregated per-pixel according to the color aggregation
 73 |         formula in [1].
 74 |         The alpha values are aggregated per-pixel according to the silhouette
 75 |         formula in [1].
 76 | 
 77 |     [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for
 78 |     Image-based 3D Reasoning'
 79 | 
 80 |     Raises:
 81 |         ValueError: An invalid argument to the method is detected.
 82 |     """
 83 |     vertex_count = world_space_vertices.shape[1]
 84 |     batch_size = world_space_vertices.shape[0]
 85 | 
 86 |     clip_space_vertices = camera_utils.transform_homogeneous(
 87 |         camera_matrices, world_space_vertices)
 88 | 
 89 |     batch_images = []
 90 | 
 91 |     for b in range(batch_size):
 92 |         image = rasterize_batch(
 93 |             clip_space_vertices[b, :, :],
 94 |             triangles,
 95 |             ### vertex attributes
 96 |             world_space_vertices[b, :, :],
 97 |             normals[b, :, :],
 98 |             diffuse_colors[b, :, :],
 99 |             ### lighting
100 |             light_positions[b, :, :],
101 |             light_intensities[b, :],
102 |             ###
103 |             image_width,
104 |             image_height,
105 |             sigma_val,
106 |             gamma_val
107 |         )
108 |         batch_images.append(image)
109 | 
110 |     return torch.stack(batch_images, 0)
111 | 
112 | # Returns the signed area of the parallelogram
113 | # with edges v0p and v01. All inputs should be tensors
114 | # of shape [2] or [3].
115 | #
116 | # The area is positive if point p is on the right side
117 | # of the segment going from v0 to v1 (so that [p, v0, v1]
118 | # winds clockwise) and negative if p is on the left (so
119 | # that [p, v0, v1] winds counter-clockwise).
120 | def edge_function(p, v0, v1):
121 |     v0p = p - v0
122 |     v01 = v1 - v0
123 |     return v0p[0] * v01[1] - v0p[1] * v01[0]
124 | 
125 | # Returns barycentric coordinates of a 3D point P w.r.t. triangle v0, v1, v2.
126 | # The input `M_inv` should be the inverse of a 3x3 matrix where the columns are the vertices.
127 | def barycentric(M_inv, p):
128 |     return M_inv @ p
129 | 
130 | # Returns barycentric coordinates of a point P (in homogeneous 3D coordinates xyz)
131 | # w.r.t. triangle v0, v1, v2, the same for the point on the edge of the triangle nearest to P,
132 | # and the distance between them.
133 | # Args:
134 | # - p: 3D point, a tensor with shape [3].
135 | # - M: A 3x3 matrix where the columns are the vertices v0, v1, v2 of the triangle.
136 | # - M_inv: The inverse of M.
137 | #
138 | # Returns:
139 | # - bc_p: 1D tensor of shape [3] giving barycentric coordinates for p.
140 | #         If p is outside the triangle, one of the coordinates will be negative.
141 | # - mindist_sq: scalar tensor (float) giving the squared distance from p to the nearest point.
142 | # - bc_edge: 1D tensor of shape [3] giving barycentric coordinates for the nearest point
143 | #               on the edge of the triangle.
144 | def barycentric_edge(M, M_inv, p):
145 |     bc_p = barycentric(M_inv, p)
146 |     v01_nearest, t01 = point_to_segment_nearest(p[:2], M[:, 0][:2], M[:, 1][:2])
147 |     v12_nearest, t12 = point_to_segment_nearest(p[:2], M[:, 1][:2], M[:, 2][:2])
148 |     v20_nearest, t20 = point_to_segment_nearest(p[:2], M[:, 2][:2], M[:, 0][:2])
149 |     d = torch.stack([v01_nearest, v12_nearest, v20_nearest]) - p[:2]
150 |     mindist_sq, argmin = torch.min(torch.sum(d * d, dim=-1), dim=0)
151 |     if argmin == 0:
152 |         return bc_p, mindist_sq, torch.stack([1. - t01, t01, torch.tensor(0.)])
153 |     elif argmin == 1:
154 |         return bc_p, mindist_sq, torch.stack([torch.tensor(0.), 1. - t12, t12])
155 |     else:
156 |         return bc_p, mindist_sq, torch.stack([t20, torch.tensor(0.), 1. - t20])
157 | 
158 | # Returns the point on a 2D line segment which is nearest to the input point,
159 | # and the number t between [0, 1] giving how far that is on the segment.
160 | #
161 | # Args:
162 | # - p: 2D point, a tensor with shape [2] that we want to project on the line segment.
163 | # - a: 2D point, a tensor with shape [2]. Start of the line segment.
164 | # - b: 2D point, a tensor with shape [2]. End of the line segment.
165 | #
166 | # Returns:
167 | # - x: 2D point, the point on the line segment nearest p.
168 | # - t: Number between [0, 1] giving the normalized distance from `a` to `x`.
169 | def point_to_segment_nearest(p, a, b):
170 |     ab = b - a
171 |     len_ab = torch.linalg.vector_norm(b - a, ord=2)
172 |     n = ab / max(len_ab, 1e-12)
173 |     proj_p_n = torch.dot(p - a, n) * n
174 |     t = torch.clamp(torch.dot(proj_p_n, n) / len_ab, 0., 1.)
175 |     x = a + t * ab
176 |     return x, t
177 | 
178 | # Samples the diffuse texture of the triangle at the given barycentric
179 | # coordinates, then returns the corresponding RGBA color with phong shading
180 | # applied to it.
181 | # Returns:
182 | # - a tensor of shape [3] giving the lit RGB value for this pixel
183 | def compute_shaded_color(
184 |     bc,
185 |     triangle,
186 |     ### vertex attributes
187 |     world_space_vertices,
188 |     normals,
189 |     diffuse_colors,
190 |     ### lighting
191 |     light_positions,
192 |     light_intensities,
193 | ):
194 |     light_count = len(light_positions)
195 |     diffuse_color = bc @ diffuse_colors[triangle, :] # [3]
196 |     p = bc @ world_space_vertices[triangle, :] # [3]
197 |     n = torch.nn.functional.normalize(bc @ normals[triangle, :], p=2, dim=-1) # [3]
198 |     dirs_to_lights = torch.nn.functional.normalize(
199 |         light_positions - p, p=2, dim=-1) # [light_count, 3]
200 | 
201 |     # Surfaces should only be illuminated when the light and normal face
202 |     # one another (e.g. dot product is non-negative)
203 |     normals_dot_lights = torch.clamp(
204 |         torch.sum(dirs_to_lights * n, dim=-1),
205 |         0.0, 1.0) # [light_count]
206 |     diffuse_output = diffuse_color * torch.sum(normals_dot_lights * light_intensities, dim=-1) # [3]
207 | 
208 |     return diffuse_output
209 | 
210 | SHOW_DEBUG_LOGS = False
211 | EPS = 1e-10 # used to give background color a constant small probability
212 | def rasterize_batch(
213 |     clip_space_vertices,
214 |     triangles,
215 |     ### vertex attributes
216 |     world_space_vertices,
217 |     normals,
218 |     diffuse_colors,
219 |     ### lighting
220 |     light_positions,
221 |     light_intensities,
222 |     ###
223 |     image_width,
224 |     image_height,
225 |     sigma_val,
226 |     gamma_val,
227 |     blur_radius=0.01
228 | ):
229 |     """
230 |     Soft-rasterize a mesh already transformed to clip space.
231 |     Non-batched function.
232 | 
233 |     Args:
234 |         clip_space_vertices: 2D float32 tensor of homogeneous vertices (xyzw)
235 |             with shape [vertex_count, 4].
236 |         triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet
237 |             should contain vertex indices describing a triangle such that the
238 |             triangle's normal points toward the viewer if the forward order of
239 |             the triplet defines a counter-clockwise winding of the vertices. Gradients
240 |             with respect to this tensor are not available.
241 | 
242 |         world_space_vertices: 2D float32 tensor of xyz positions with shape
243 |             [vertex_count, 3].
244 |         normals: 2D float32 tensor with shape [vertex_count, 3]. Each
245 |             triplet is the xyz vertex normal for its corresponding vertex. Each
246 |             vector is assumed to be already normalized.
247 |         diffuse_colors: 2D float32 tensor with shape [vertex_count, 3]. The RGB
248 |             diffuse reflection in the range [0, 1] for each vertex.
249 | 
250 |         light_positions: a 2D tensor with shape [light_count, 3]. The world space
251 |             XYZ position of each light in the scene.
252 |         light_intensities: a 1D tensor with shape [light_count].
253 |             The intensity values for each light. Intensities may be above 1.
254 | 
255 |         image_width: int specifying desired output image width in pixels.
256 |         image_height: int specifying desired output image height in pixels.
257 |         sigma_val: parameter controlling the sharpness of the coverage distribution
258 |             for a single triangle. A smaller sigma leads to a sharper distribution.
259 |         gamma_val: temperature parameter controlling uniformity of the triangle
260 |             probability distribution for a pixel in the depth aggregation.
261 |             When gamma is 0, all probability mass will fall into the triangle
262 |             with highest z, matching the behavior of z-buffering.
263 |         blur_radius: float specifying the cutoff radius of soft-rasterization sampling
264 |             in NDC-space.
265 | 
266 |     Returns:
267 |         A 3D float32 tensor of shape [image_height, image_width, 4]
268 |         containing the lit RGBA color values at each pixel.
269 |         The RGB values are aggregated per-pixel according to the color aggregation
270 |         formula in [1].
271 |         The alpha values are aggregated per-pixel according to the silhouette
272 |         formula in [1].
273 | 
274 |     [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for
275 |     Image-based 3D Reasoning'
276 |     """
277 |     sq_blur_radius = blur_radius**2
278 |     result = torch.zeros([image_height, image_width, 4], dtype=torch.float32)
279 | 
280 |     ndc_face_matrices = torch.zeros([len(triangles), 3, 3], dtype=torch.float32)
281 |     ndc_2d_face_matrices_inv = torch.zeros([len(triangles), 3, 3], dtype=torch.float32)
282 |     ndc_face_areas = torch.zeros([len(triangles)], dtype=torch.float32)
283 |     quadtree = QuadTreeNode(torch.tensor([[-1., -1.], [1., 1.]]), 0)
284 |     for i in range(len(triangles)):
285 |         triangle = triangles[i]
286 |         clip_v012 = clip_space_vertices[triangle] # shape: [3, 4]
287 |         clip_v012_w = clip_v012[:, [3]] # shape: [3, 1]
288 | 
289 |         ndc_M = (clip_v012 / (clip_v012_w)).T[:3, :] # [3, 3], each column is a vertex
290 |         ndc_face_matrices[i, :, :] = ndc_M
291 | 
292 |         ndc_2d_M = ndc_M.clone()
293 |         ndc_2d_M[2, :] = torch.tensor([1., 1., 1.])
294 |         try:
295 |             ndc_2d_M_inv = ndc_2d_M.inverse()
296 |         except Exception:
297 |             # NDC-space vertex basis is not invertible, meaning triangle is
298 |             # degenerate when projected (zero area).
299 |             continue
300 |         ndc_2d_face_matrices_inv[i, :, :] = ndc_2d_M_inv
301 |         ndc_face_areas[i] = edge_function(ndc_M[:, 0], ndc_M[:, 1], ndc_M[:, 2])
302 |         ndc_bbox = torch.tensor([
303 |             [torch.min(ndc_M[0, :]) - blur_radius, torch.min(ndc_M[1, :]) - blur_radius],
304 |             [torch.max(ndc_M[0, :]) + blur_radius, torch.max(ndc_M[1, :]) + blur_radius]
305 |         ])
306 |         quadtree.insert(ndc_bbox, i)
307 | 
308 |     total_samples = 0
309 |     for y in range(image_height):
310 | 
311 |         row_samples_drawn = 0
312 |         row_max_samples_drawn = 0
313 | 
314 |         for x in range(image_width):
315 |             ndc_x = 2.0 * ((x + 0.5) / image_width) - 1.0
316 |             ndc_y = -2.0 * ((y + 0.5) / image_height) + 1.0 # invert y
317 |             ndc_p = torch.tensor([ndc_x, ndc_y, 1.0])
318 | 
319 |             soft_weights = torch.zeros([len(triangles)])
320 |             soft_fragments = torch.zeros([len(triangles)])
321 |             soft_colors = torch.zeros([len(triangles), 3])
322 | 
323 |             samples_drawn = 0
324 |             for triangle_bbox, i in quadtree.leaf_for_point(ndc_p[:2]).data:
325 |                 triangle = triangles[i]
326 | 
327 |                 clip_v012 = clip_space_vertices[triangle] # shape: [3, 4]
328 |                 clip_v012_w = clip_v012[:, [3]] # shape: [3, 1]
329 |                 ndc_M = ndc_face_matrices[i] # [3, 3]
330 |                 ndc_depths = ndc_M.T[:, [2]] # [3, 1]
331 |                 if ndc_face_areas[i] > 0:
332 |                     # Back-face culling: skip triangles facing away from the camera.
333 |                     continue
334 |                 elif ndc_face_areas[i] == 0:
335 |                     # Skip degenerate triangles with zero area.
336 |                     continue
337 |                 ndc_2d_M_inv = ndc_2d_face_matrices_inv[i]
338 | 
339 |                 # fast distance culling: check if pixel is outside the
340 |                 # triangle's bounding box inflated by blur_radius
341 |                 if not contains(triangle_bbox, ndc_p[:2]):
342 |                     continue
343 |                 bc_screen, sq_dist, bc_edge_screen = barycentric_edge(
344 |                     # Note: ndc_2d_M_inv is the inverse of `ndc_M` with uniform z-components,
345 |                     # not `ndc_M` itself. This is ok because we only use the `M` matrix in
346 |                     # this function to extract the x and y components of face vertices.
347 |                     ndc_M,
348 |                     ndc_2d_M_inv,
349 |                     ndc_p
350 |                 )
351 |                 is_inside = not torch.any(bc_screen < 0.)
352 | 
353 |                 # slow distance culling: check if pixel is too far from sample point
354 |                 if not is_inside and sq_dist > sq_blur_radius:
355 |                     continue
356 | 
357 |                 # Get perspective-correct barycentric coordinates for the point to sample from
358 |                 # by un-doing the perspective projection on the screen-space barycentrics.
359 |                 sample_bc = torch.nn.functional.normalize(
360 |                     # If p is inside the triangle, sample from p itself.
361 |                     # Otherwise, sample from the point inside the triangle nearest to p.
362 |                     (bc_screen if is_inside else bc_edge_screen)
363 |                     / clip_v012_w.T[0],
364 |                     dim=0, p=1
365 |                 ) # [3]
366 | 
367 |                 # Get normalized depth of nearest points in NDC-space.
368 |                 z = sample_bc @ ndc_depths # Range [-1, +1] where -1 is near plane
369 |                 # Map to range (0, 1) where 1.0 is near plane, 0.0 is far plane
370 |                 z = 0.5 - z/2.
371 | 
372 |                 if z < 0.0 or z > 1.0:
373 |                     # Sample point is out of screen, pass
374 |                     continue
375 | 
376 |                 soft_colors[i, :3] = compute_shaded_color(
377 |                     sample_bc,
378 |                     triangle,
379 |                     ### vertex attributes
380 |                     world_space_vertices,
381 |                     normals,
382 |                     diffuse_colors,
383 |                     ### lighting
384 |                     light_positions,
385 |                     light_intensities,
386 |                 )
387 | 
388 |                 sgn = 1. if is_inside else -1.
389 |                 soft_fragments[i] = torch.special.expit(sgn * sq_dist / sigma_val)
390 | 
391 |                 # Set these equal to the un-exponentiated logits.
392 |                 # We shouldn't exponentiate until we can adjust the maximum value
393 |                 # below to avoid overflow.
394 |                 soft_weights[i] = z / gamma_val
395 |                 samples_drawn += 1
396 | 
397 |             max_soft_weight = max(torch.max(soft_weights), torch.tensor(EPS / gamma_val))
398 |             soft_weights = soft_fragments * torch.exp(soft_weights - max_soft_weight)
399 | 
400 |             # background weight should never be zero.
401 |             bg_weight = max(torch.exp(EPS / gamma_val - max_soft_weight), EPS)
402 | 
403 |             # normalize all logits
404 |             sum_weights = torch.sum(soft_weights) + bg_weight
405 |             soft_weights = soft_weights / sum_weights
406 | 
407 |             # bg color is transparent, otherwise we'd add `(bg_weight / sum_weights) * bg_color`
408 |             result[y][x][:3] = soft_weights @ soft_colors
409 | 
410 |             # Compute the silhouette score, which is based on the probability that
411 |             # at least 1 triangle covers the pixel. This is 1 - probability that
412 |             # all triangles do not cover the pixel.
413 |             silhouette = 1.0 - torch.prod((1.0 - soft_fragments))
414 |             result[y][x][3] = silhouette
415 | 
416 |             row_samples_drawn += samples_drawn
417 |             row_max_samples_drawn = max(row_max_samples_drawn, samples_drawn)
418 |             total_samples += samples_drawn
419 |         if SHOW_DEBUG_LOGS:
420 |             print("drew {} samples (max={}) for row y={}".format(row_samples_drawn, row_max_samples_drawn, y))
421 |     if SHOW_DEBUG_LOGS:
422 |         print("drew {} samples total".format(total_samples))
423 | 
424 |     return result


--------------------------------------------------------------------------------
/src/soft_mesh_renderer/render.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Differentiable 3D rendering of a triangle mesh based on
  3 | the soft rasterization formulation from Liu 2019.
  4 | """
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import torch
 11 | 
 12 | from ..common import camera_utils, meshes
 13 | from .rasterize import rasterize
 14 | 
 15 | def render(
 16 |     vertices,
 17 |     triangles,
 18 |     diffuse_colors,
 19 |     camera_position,
 20 |     camera_lookat,
 21 |     camera_up,
 22 |     light_positions,
 23 |     light_intensities,
 24 |     image_width,
 25 |     image_height,
 26 |     sigma_val=1e-5,
 27 |     gamma_val=1e-4,
 28 |     blur_radius=0.01,
 29 |     fov_y=40.0,
 30 |     near_clip=0.01,
 31 |     far_clip=10.0):
 32 |     """Soft-renders an input scene using phong shading, and returns an output image.
 33 | 
 34 |     Args:
 35 |       vertices: 3D float32 tensor with shape [batch_size, vertex_count, 3]. Each
 36 |         triplet is an xyz position in world space.
 37 |       triangles: 2D int32 tensor with shape [triangle_count, 3]. Each triplet
 38 |         should contain vertex indices describing a triangle such that the
 39 |         triangle's normal points toward the viewer if the forward order of the
 40 |         triplet defines a counter-clockwise winding of the vertices. Gradients with
 41 |         respect to this tensor are not available.
 42 |       diffuse_colors: 3D float32 tensor with shape [batch_size,
 43 |         vertex_count, 3]. The RGB diffuse reflection in the range [0, 1] for
 44 |         each vertex.
 45 |       camera_position: 2D tensor with shape [batch_size, 3] or 1D tensor with
 46 |         shape [3] specifying the XYZ world space camera position.
 47 |       camera_lookat: 2D tensor with shape [batch_size, 3] or 1D tensor with
 48 |         shape [3] containing an XYZ point along the center of the camera's gaze.
 49 |       camera_up: 2D tensor with shape [batch_size, 3] or 1D tensor with shape
 50 |         [3] containing the up direction for the camera. The camera will have
 51 |         no tilt with respect to this direction.
 52 |       light_positions: a 3D tensor with shape [batch_size, light_count, 3]. The
 53 |         world space XYZ position of each light in the scene.
 54 |       light_intensities: a 3D tensor with shape [batch_size, light_count].
 55 |         The intensity values for each light. Intensities may be above 1.
 56 |       image_width: int specifying desired output image width in pixels.
 57 |       image_height: int specifying desired output image height in pixels.
 58 |       sigma_val: parameter controlling the sharpness of the coverage distribution
 59 |         for a single triangle. A smaller sigma leads to a sharper distribution.
 60 |       gamma_val: temperature parameter controlling uniformity of the triangle
 61 |         probability distribution for a pixel in the depth aggregation.
 62 |         When gamma is 0, all probability mass will fall into the triangle
 63 |         with highest z, matching the behavior of z-buffering.
 64 |       blur_radius: float specifying the cutoff radius of soft-rasterization sampling
 65 |             in NDC-space.
 66 |       fov_y: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
 67 |         desired output image y field of view in degrees.
 68 |       near_clip: float, 0D tensor, or 1D tensor with shape [batch_size]
 69 |         specifying near clipping plane distance.
 70 |       far_clip: float, 0D tensor, or 1D tensor with shape [batch_size]
 71 |         specifying far clipping plane distance.
 72 | 
 73 |     Returns:
 74 |         A 4D float32 tensor of shape [batch_size, image_height, image_width, 4]
 75 |         containing the lit RGBA color values for each image at each pixel.
 76 |         The RGB values are aggregated per-pixel according to the color aggregation
 77 |         formula in [1].
 78 |         The alpha values are aggregated per-pixel according to the silhouette
 79 |         formula in [1].
 80 | 
 81 |     [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for
 82 |     Image-based 3D Reasoning'
 83 |     Raises:
 84 |       ValueError: An invalid argument to the method is detected.
 85 |     """
 86 |     if len(vertices.shape) != 3 or vertices.shape[-1] != 3:
 87 |         raise ValueError(
 88 |             "Vertices must have shape [batch_size, vertex_count, 3].")
 89 |     if len(triangles.shape) != 2 or triangles.shape[-1] != 3:
 90 |         raise ValueError(
 91 |             "Triangles must have shape [triangle_count, 3].")
 92 |     batch_size = vertices.shape[0]
 93 |     if len(light_positions.shape) != 3 or light_positions.shape[-1] != 3:
 94 |         raise ValueError(
 95 |             "light_positions must have shape [batch_size, light_count, 3].")
 96 |     if len(light_intensities.shape) != 2:
 97 |         raise ValueError(
 98 |             "light_intensities must have shape [batch_size, light_count].")
 99 |     if len(diffuse_colors.shape) != 3 or diffuse_colors.shape[-1] != 3:
100 |         raise ValueError(
101 |             "diffuse_colors must have shape [batch_size, vertex_count, 3].")
102 |     if list(camera_position.shape) == [3]:
103 |         camera_position = torch.unsqueeze(camera_position, 0).repeat(batch_size, 1)
104 |     elif list(camera_position.shape) != [batch_size, 3]:
105 |         raise ValueError(
106 |             "camera_position must have shape [batch_size, 3] or [3].")
107 |     if list(camera_lookat.shape) == [3]:
108 |         camera_lookat = torch.unsqueeze(camera_lookat, 0).repeat(batch_size, 1)
109 |     elif list(camera_lookat.shape) != [batch_size, 3]:
110 |         raise ValueError(
111 |             "camera_lookat must have shape [batch_size, 3] or [3].")
112 |     if list(camera_up.shape) == [3]:
113 |         camera_up = torch.unsqueeze(camera_up, 0).repeat(batch_size, 1)
114 |     elif list(camera_up.shape) != [batch_size, 3]:
115 |         raise ValueError("camera_up must have shape [batch_size, 3] or [3].")
116 |     if isinstance(fov_y, float):
117 |         fov_y = torch.tensor(batch_size * [fov_y], dtype=torch.float32)
118 |     elif len(fov_y.shape) == 0:
119 |         fov_y = torch.unsqueeze(fov_y, 0).repeat(batch_size)
120 |     elif list(fov_y.shape) != [batch_size]:
121 |         raise ValueError("fov_y must be a float, a 0D tensor, or a 1D tensor "
122 |                          "with shape [batch_size].")
123 |     if isinstance(near_clip, float):
124 |         near_clip = torch.tensor(batch_size * [near_clip], dtype=torch.float32)
125 |     elif len(near_clip.shape) == 0:
126 |         near_clip = torch.unsqueeze(near_clip, 0).repeat(batch_size)
127 |     elif list(near_clip.shape) != [batch_size]:
128 |         raise ValueError("near_clip must be a float, a 0D tensor, or a 1D "
129 |                          "tensor with shape [batch_size].")
130 |     if isinstance(far_clip, float):
131 |         far_clip = torch.tensor(batch_size * [far_clip], dtype=torch.float32)
132 |     elif len(far_clip.shape) == 0:
133 |         far_clip = torch.unsqueeze(far_clip, 0).repeat(batch_size)
134 |     elif list(far_clip.shape) != [batch_size]:
135 |         raise ValueError("far_clip must be a float, a 0D tensor, or a 1D "
136 |                          "tensor with shape [batch_size].")
137 | 
138 |     camera_matrices = camera_utils.look_at(camera_position, camera_lookat,
139 |                                            camera_up)
140 | 
141 |     perspective_transforms = camera_utils.perspective(
142 |         image_width / image_height,
143 |         fov_y,
144 |         near_clip,
145 |         far_clip)
146 | 
147 |     clip_space_transforms = torch.matmul(perspective_transforms, camera_matrices)
148 |     normals = meshes.compute_vertex_normals(vertices, triangles)
149 | 
150 |     return rasterize(
151 |         vertices,
152 |         triangles,
153 |         ### vertex attributes
154 |         normals,
155 |         diffuse_colors,
156 |         ### lighting
157 |         light_positions,
158 |         light_intensities,
159 |         ###
160 |         clip_space_transforms,
161 |         image_width,
162 |         image_height,
163 |         sigma_val,
164 |         gamma_val
165 |     )


--------------------------------------------------------------------------------
/src/soft_mesh_renderer/test_rasterize.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import unittest
  4 | 
  5 | from .rasterize import rasterize_batch, point_to_segment_nearest
  6 | from ..common import debug_utils
  7 | 
  8 | class RenderTest(unittest.TestCase):
  9 |     def test_point_to_segment_nearest(self):
 10 |         """
 11 |         Test the point_to_segment_nearest function.
 12 |         """
 13 |         # Test a point that is closest to the middle of the segment.
 14 |         point = torch.tensor([1.0, -1.0], dtype=torch.float32)
 15 |         segment = torch.tensor([[1.0, 1.0], [-1.0, -1.0]], dtype=torch.float32)
 16 |         expected_nearest = torch.tensor([0.0, 0.0], dtype=torch.float32)
 17 |         expected_t = 0.5
 18 |         nearest, t = point_to_segment_nearest(point, segment[0], segment[1])
 19 |         torch.testing.assert_close(expected_nearest, nearest,
 20 |             msg="\n\texpected={}\n\tactual={}".format(expected_nearest, nearest))
 21 |         torch.testing.assert_close(expected_t, float(t),
 22 |             msg="\n\texpected={}\n\tactual={}".format(expected_t, t))
 23 | 
 24 |         # Test a point that is closest to the start of the segment.
 25 |         point = torch.tensor([0.0, 0.0], dtype=torch.float32)
 26 |         segment = torch.tensor([[1.0, 0.0], [1.0, 1.0]], dtype=torch.float32)
 27 |         expected_nearest = torch.tensor([1.0, 0.0], dtype=torch.float32)
 28 |         expected_t = 0.0
 29 |         nearest, t = point_to_segment_nearest(point, segment[0], segment[1])
 30 |         torch.testing.assert_close(expected_nearest, nearest,
 31 |             msg="\n\texpected={}\n\tactual={}".format(expected_nearest, nearest))
 32 |         torch.testing.assert_close(expected_t, float(t),
 33 |             msg="\n\texpected={}\n\tactual={}".format(expected_t, t))
 34 | 
 35 |         # Test a point that is closest to the end of the segment.
 36 |         point = torch.tensor([0.0, 1.0], dtype=torch.float32)
 37 |         segment = torch.tensor([[1.0, 0.0], [1.0, 1.0]], dtype=torch.float32)
 38 |         expected_nearest = torch.tensor([1.0, 1.0], dtype=torch.float32)
 39 |         expected_t = 1.0
 40 |         nearest, t = point_to_segment_nearest(point, segment[0], segment[1])
 41 |         torch.testing.assert_close(expected_nearest, nearest,
 42 |             msg="\n\texpected={}\n\tactual={}".format(expected_nearest, nearest))
 43 |         torch.testing.assert_close(expected_t, float(t),
 44 |             msg="\n\texpected={}\n\tactual={}".format(expected_t, t))
 45 | 
 46 |     def test_single_triangle_forward(self):
 47 |         """
 48 |         Test the forward rasterization pass by rasterizing a single triangle to a
 49 |         small 10x10 image. The image coverage should look like so if hard-rasterized:
 50 | 
 51 |         0 0 0 0 0 0 0 0 0 H
 52 |         0 0 0 0 0 0 0 0 H 1
 53 |         0 0 0 0 0 0 0 H 1 1
 54 |         0 0 0 0 0 0 H 1 1 1
 55 |         0 0 0 0 0 H 1 1 1 1
 56 |         0 0 0 0 H 1 1 1 1 1
 57 |         0 0 0 H 1 1 1 1 1 1
 58 |         0 0 H 1 1 1 1 1 1 1
 59 |         0 H 1 1 1 1 1 1 1 1
 60 |         H 1 1 1 1 1 1 1 1 1
 61 | 
 62 |         Where 1 indicates full coverage, 0 is no coverage, and H is half-covered
 63 |         (for hard-rasterization, this can be either considered in or out).
 64 |         """
 65 | 
 66 |         # in eye space: z=-1 for all vertices, znear=0.5, zfar=2.5
 67 |         clip_space_vertices = torch.tensor(
 68 |             [
 69 |                 [1.0, -1.0, 0.25, 1.0],
 70 |                 [1.0, 1.0, 0.25, 1.0],
 71 |                 [-1.0, -1.0, 0.25, 1.0],
 72 |             ],
 73 |             dtype=torch.float32
 74 |         )
 75 |         triangles = torch.tensor([[0, 1, 2]], dtype=torch.int32)
 76 |         world_space_vertices = torch.tensor(
 77 |             [
 78 |                 [1.0, -1.0, 0.0],
 79 |                 [1.0, 1.0, 0.0],
 80 |                 [-1.0, -1.0, 0.0],
 81 |             ],
 82 |             dtype=torch.float32
 83 |         )
 84 |         normals = torch.tensor(
 85 |             [
 86 |                 [0.0, 0.0, 1.0],
 87 |                 [0.0, 0.0, 1.0],
 88 |                 [0.0, 0.0, 1.0],
 89 |             ],
 90 |             dtype=torch.float32
 91 |         )
 92 |         diffuse_colors = torch.tensor(
 93 |             [
 94 |                 [1.0, 0.0, 0.0],
 95 |                 [1.0, 0.0, 0.0],
 96 |                 [1.0, 0.0, 0.0],
 97 |             ],
 98 |             dtype=torch.float32
 99 |         )
100 |         # one light at effectively infinity
101 |         light_positions = torch.tensor([[0.0, 0.0, 100000.0]], dtype=torch.float32)
102 |         light_intensities = torch.tensor([1.0], dtype=torch.float32)
103 |         image_width, image_height = 10, 10
104 |         sigma_val = 1e-5
105 |         gamma_val = 1e-4
106 | 
107 |         ##############################################################
108 |         # Case 1: blur radius smaller than a single screen-space pixel
109 |         ##############################################################
110 |         blur_radius = 0.01
111 |         output = rasterize_batch(
112 |             clip_space_vertices,
113 |             triangles,
114 |             ### vertex attributes
115 |             world_space_vertices,
116 |             normals,
117 |             diffuse_colors,
118 |             ### lighting
119 |             light_positions,
120 |             light_intensities,
121 |             ###
122 |             image_width,
123 |             image_height,
124 |             sigma_val,
125 |             gamma_val,
126 |             blur_radius
127 |         )
128 |         expected_red = torch.tensor([
129 |             [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
130 |             [0., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
131 |             [0., 0., 0., 0., 0., 0., 0., 1., 1., 1.],
132 |             [0., 0., 0., 0., 0., 0., 1., 1., 1., 1.],
133 |             [0., 0., 0., 0., 0., 1., 1., 1., 1., 1.],
134 |             [0., 0., 0., 0., 1., 1., 1., 1., 1., 1.],
135 |             [0., 0., 0., 1., 1., 1., 1., 1., 1., 1.],
136 |             [0., 0., 1., 1., 1., 1., 1., 1., 1., 1.],
137 |             [0., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
138 |             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
139 |         ], dtype=torch.float32)
140 |         expected_green = torch.zeros_like(expected_red)
141 |         expected_blue = torch.zeros_like(expected_red)
142 |         expected_alpha = torch.tensor([
143 |             [0., 0., 0., 0., 0., 0., 0., 0., 0., .5],
144 |             [0., 0., 0., 0., 0., 0., 0., 0., .5, 1.],
145 |             [0., 0., 0., 0., 0., 0., 0., .5, 1., 1.],
146 |             [0., 0., 0., 0., 0., 0., .5, 1., 1., 1.],
147 |             [0., 0., 0., 0., 0., .5, 1., 1., 1., 1.],
148 |             [0., 0., 0., 0., .5, 1., 1., 1., 1., 1.],
149 |             [0., 0., 0., .5, 1., 1., 1., 1., 1., 1.],
150 |             [0., 0., .5, 1., 1., 1., 1., 1., 1., 1.],
151 |             [0., .5, 1., 1., 1., 1., 1., 1., 1., 1.],
152 |             [.5, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
153 |         ], dtype=torch.float32)
154 | 
155 |         torch.testing.assert_close(output[..., 0], expected_red)
156 |         torch.testing.assert_close(output[..., 1], expected_green)
157 |         torch.testing.assert_close(output[..., 2], expected_blue)
158 |         torch.testing.assert_close(output[..., 3], expected_alpha)
159 |         ##############################################################
160 |         # Case 2: blur radius spans a single screen-space pixel
161 |         ##############################################################
162 |         # Add a small epsilon to capture samples right on the edge.
163 |         blur_radius2 = 0.1 * np.sqrt(2.0) + 1e-6
164 |         # This will cause samples blur_radius2 away from a triangle to
165 |         # have a nonzero coverage (1e-3) by the triangle. This is needed
166 |         # for samples that lie exactly on the edge to work.
167 |         sigma_val2 = -blur_radius2**2 / torch.special.logit(torch.tensor(1e-3))
168 |         output2 = rasterize_batch(
169 |             clip_space_vertices,
170 |             triangles,
171 |             ### vertex attributes
172 |             world_space_vertices,
173 |             normals,
174 |             diffuse_colors,
175 |             ### lighting
176 |             light_positions,
177 |             light_intensities,
178 |             ###
179 |             image_width,
180 |             image_height,
181 |             sigma_val2,
182 |             gamma_val,
183 |             blur_radius2
184 |         )
185 |         expected_red2 = torch.tensor([
186 |             [0., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
187 |             [0., 0., 0., 0., 0., 0., 0., 1., 1., 1.],
188 |             [0., 0., 0., 0., 0., 0., 1., 1., 1., 1.],
189 |             [0., 0., 0., 0., 0., 1., 1., 1., 1., 1.],
190 |             [0., 0., 0., 0., 1., 1., 1., 1., 1., 1.],
191 |             [0., 0., 0., 1., 1., 1., 1., 1., 1., 1.],
192 |             [0., 0., 1., 1., 1., 1., 1., 1., 1., 1.],
193 |             [0., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
194 |             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
195 |             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
196 |         ], dtype=torch.float32)
197 |         expected_green2 = torch.zeros_like(expected_red2)
198 |         expected_blue2 = torch.zeros_like(expected_red2)
199 |         expected_alpha2 = torch.tensor([
200 |             [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000],
201 |             [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9693],
202 |             [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 0.9693],
203 |             [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 0.9693],
204 |             [0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 0.9693],
205 |             [0.0000, 0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 1.0000, 0.9693],
206 |             [0.0000, 0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 1.0000, 1.0000, 0.9693],
207 |             [0.0000, 0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9693],
208 |             [0.0010, 0.5000, 0.9990, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9693],
209 |             [0.5000, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693, 0.9693]
210 |         ], dtype=torch.float32)
211 | 
212 |         torch.testing.assert_close(output2[..., 0], expected_red2)
213 |         torch.testing.assert_close(output2[..., 1], expected_green2)
214 |         torch.testing.assert_close(output2[..., 2], expected_blue2)
215 |         torch.testing.assert_close(output2[..., 3], expected_alpha2, atol=1e-04, rtol=0)
216 | 
217 |     def test_optimize_single_triangle_translation(self):
218 |         """
219 |         Test optimizing a single triangle's xy-translation.
220 | 
221 |         The test proceeds by rasterizing a single triangle to a 10x10 image.
222 |         The starting triangle slightly overlaps the target triangle. To
223 |         reach the target, the triangle must be translated to the right by its
224 |         full length.
225 |         """
226 |         translation_x = torch.tensor(0., requires_grad=True)
227 |         target_translation_x = 0.25
228 |         # in eye space: z=-1 for all vertices, znear=0.5, zfar=2.5
229 |         clip_space_vertices = torch.tensor(
230 |             [
231 |                 [-0.5, 0.0, 0.25, 1.0],
232 |                 [0.5, 1.0, 0.25, 1.0],
233 |                 [-0.5, 1.0, 0.25, 1.0],
234 |             ],
235 |             dtype=torch.float32
236 |         )
237 |         triangles = torch.tensor([[0, 1, 2]], dtype=torch.int32)
238 |         world_space_vertices = torch.tensor(
239 |             [
240 |                 [-0.5, 0.0, 0.0],
241 |                 [0.5, 1.0, 0.0],
242 |                 [-0.5, 1.0, 0.0],
243 |             ],
244 |             dtype=torch.float32
245 |         )
246 |         normals = torch.tensor(
247 |             [
248 |                 [0.0, 0.0, 1.0],
249 |                 [0.0, 0.0, 1.0],
250 |                 [0.0, 0.0, 1.0],
251 |             ],
252 |             dtype=torch.float32
253 |         )
254 |         diffuse_colors = torch.tensor(
255 |             [
256 |                 [1.0, 0.0, 0.0],
257 |                 [1.0, 0.0, 0.0],
258 |                 [1.0, 0.0, 0.0],
259 |             ],
260 |             dtype=torch.float32
261 |         )
262 |         # one light at effectively infinity
263 |         light_positions = torch.tensor([[0.0, 0.0, 100000.0]], dtype=torch.float32)
264 |         light_intensities = torch.tensor([1.0], dtype=torch.float32)
265 |         image_width, image_height = 10, 10
266 |         sigma_val = 1e-5
267 |         gamma_val = 1e-1
268 | 
269 |         # rasterize target image
270 |         target_output = rasterize_batch(
271 |             clip_space_vertices + torch.tensor([target_translation_x, 0.0, 0.0, 0.0]),
272 |             triangles,
273 |             ### vertex attributes
274 |             world_space_vertices + torch.tensor([target_translation_x, 0.0, 0.0]),
275 |             normals,
276 |             diffuse_colors,
277 |             ### lighting
278 |             light_positions,
279 |             light_intensities,
280 |             ###
281 |             image_width,
282 |             image_height,
283 |             sigma_val,
284 |             gamma_val,
285 |             0.01 # target image should not be blurred
286 |         )
287 | 
288 |         blur_radius = 0.0
289 |         sigma_saturation_radius = 0.5
290 |         sigma_val = -sigma_saturation_radius**2 / torch.special.logit(torch.tensor(1e-5))
291 |         def stepfn():
292 |             clip_space_translation = torch.zeros_like(clip_space_vertices)
293 |             world_space_translation = torch.zeros_like(world_space_vertices)
294 |             clip_space_translation[:, 0] = translation_x
295 |             world_space_translation[:, 0] = translation_x
296 | 
297 |             output = rasterize_batch(
298 |                 clip_space_vertices + clip_space_translation,
299 |                 triangles,
300 |                 ### vertex attributes
301 |                 world_space_vertices + world_space_translation,
302 |                 normals,
303 |                 diffuse_colors,
304 |                 ### lighting
305 |                 light_positions,
306 |                 light_intensities,
307 |                 ###
308 |                 image_width,
309 |                 image_height,
310 |                 sigma_val,
311 |                 gamma_val,
312 |                 blur_radius
313 |             )
314 | 
315 |             loss = torch.mean(torch.abs(output - target_output))
316 |             loss.backward()
317 |             return loss
318 | 
319 |         # optimization loop: rasterize then backwards until optimized
320 |         optimizer = torch.optim.SGD([translation_x], 0.7, 0.1)
321 |         for e in range(50):
322 |             optimizer.zero_grad()
323 |             optimizer.step(stepfn)
324 | 
325 |         pixel_width = 0.2 # 10x10 grid and NDC range from -1.0 to +1.0
326 |         torch.testing.assert_close(float(translation_x), target_translation_x, atol=pixel_width/2, rtol=0.0)
327 | 
328 | 
329 | if __name__ == "__main__":
330 |     unittest.main()


--------------------------------------------------------------------------------