├── LICENSE.txt ├── README.md ├── docker ├── 10_nvidia.json └── Dockerfile ├── docs ├── img │ ├── cube.png │ ├── earth.png │ ├── envphong.png │ ├── logo.png │ ├── pipe_cube.png │ ├── pipe_earth.png │ ├── pipe_envphong.png │ ├── pose.png │ ├── spot_aa.png │ ├── spot_crop1.png │ ├── spot_crop2.png │ ├── spot_diff1.png │ ├── spot_diff2.png │ ├── spot_peel1.png │ ├── spot_peel2.png │ ├── spot_st.png │ ├── spot_tex.png │ ├── spot_texture.png │ ├── spot_texw.png │ ├── spot_tri.png │ ├── spot_uv.png │ ├── teaser.png │ ├── teaser1.png │ ├── teaser2.png │ ├── teaser3.png │ ├── teaser4.png │ ├── teaser5.png │ ├── thumb.jpg │ └── tri.png └── index.html ├── nvdiffrast ├── __init__.py ├── common │ ├── antialias.cu │ ├── antialias.h │ ├── common.cpp │ ├── common.h │ ├── cudaraster │ │ ├── CudaRaster.hpp │ │ └── impl │ │ │ ├── BinRaster.inl │ │ │ ├── Buffer.cpp │ │ │ ├── Buffer.hpp │ │ │ ├── CoarseRaster.inl │ │ │ ├── Constants.hpp │ │ │ ├── CudaRaster.cpp │ │ │ ├── Defs.hpp │ │ │ ├── FineRaster.inl │ │ │ ├── PrivateDefs.hpp │ │ │ ├── RasterImpl.cpp │ │ │ ├── RasterImpl.cu │ │ │ ├── RasterImpl.hpp │ │ │ ├── TriangleSetup.inl │ │ │ └── Util.inl │ ├── framework.h │ ├── glutil.cpp │ ├── glutil.h │ ├── glutil_extlist.h │ ├── interpolate.cu │ ├── interpolate.h │ ├── rasterize.cu │ ├── rasterize.h │ ├── rasterize_gl.cpp │ ├── rasterize_gl.h │ ├── texture.cpp │ ├── texture.cu │ └── texture.h ├── lib │ └── setgpu.lib ├── tensorflow │ ├── __init__.py │ ├── ops.py │ ├── plugin_loader.py │ ├── tf_all.cu │ ├── tf_antialias.cu │ ├── tf_interpolate.cu │ ├── tf_rasterize.cu │ └── tf_texture.cu └── torch │ ├── __init__.py │ ├── ops.py │ ├── torch_antialias.cpp │ ├── torch_bindings.cpp │ ├── torch_bindings_gl.cpp │ ├── torch_common.inl │ ├── torch_interpolate.cpp │ ├── torch_rasterize.cpp │ ├── torch_rasterize_gl.cpp │ ├── torch_texture.cpp │ └── torch_types.h ├── run_sample.sh ├── samples ├── data │ ├── NOTICE.txt │ ├── cube_c.npz │ ├── cube_d.npz │ ├── cube_p.npz │ ├── earth.npz │ └── envphong.npz ├── tensorflow │ ├── cube.py │ ├── earth.py │ ├── envphong.py │ ├── pose.py │ ├── triangle.py │ └── util.py └── torch │ ├── cube.py │ ├── earth.py │ ├── envphong.py │ ├── pose.py │ ├── triangle.py │ └── util.py └── setup.py /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, NVIDIA Corporation. All rights reserved. 2 | 3 | 4 | Nvidia Source Code License (1-Way Commercial) 5 | 6 | ======================================================================= 7 | 8 | 1. Definitions 9 | 10 | "Licensor" means any person or entity that distributes its Work. 11 | 12 | "Software" means the original work of authorship made available under 13 | this License. 14 | 15 | "Work" means the Software and any additions to or derivative works of 16 | the Software that are made available under this License. 17 | 18 | The terms "reproduce," "reproduction," "derivative works," and 19 | "distribution" have the meaning as provided under U.S. copyright law; 20 | provided, however, that for the purposes of this License, derivative 21 | works shall not include works that remain separable from, or merely 22 | link (or bind by name) to the interfaces of, the Work. 23 | 24 | Works, including the Software, are "made available" under this License 25 | by including in or with the Work either (a) a copyright notice 26 | referencing the applicability of this License to the Work, or (b) a 27 | copy of this License. 28 | 29 | 2. License Grants 30 | 31 | 2.1 Copyright Grant. Subject to the terms and conditions of this 32 | License, each Licensor grants to you a perpetual, worldwide, 33 | non-exclusive, royalty-free, copyright license to reproduce, 34 | prepare derivative works of, publicly display, publicly perform, 35 | sublicense and distribute its Work and any resulting derivative 36 | works in any form. 37 | 38 | 3. Limitations 39 | 40 | 3.1 Redistribution. You may reproduce or distribute the Work only 41 | if (a) you do so under this License, (b) you include a complete 42 | copy of this License with your distribution, and (c) you retain 43 | without modification any copyright, patent, trademark, or 44 | attribution notices that are present in the Work. 45 | 46 | 3.2 Derivative Works. You may specify that additional or different 47 | terms apply to the use, reproduction, and distribution of your 48 | derivative works of the Work ("Your Terms") only if (a) Your Terms 49 | provide that the use limitation in Section 3.3 applies to your 50 | derivative works, and (b) you identify the specific derivative 51 | works that are subject to Your Terms. Notwithstanding Your Terms, 52 | this License (including the redistribution requirements in Section 53 | 3.1) will continue to apply to the Work itself. 54 | 55 | 3.3 Use Limitation. The Work and any derivative works thereof only 56 | may be used or intended for use non-commercially. The Work or 57 | derivative works thereof may be used or intended for use by Nvidia 58 | or its affiliates commercially or non-commercially. As used herein, 59 | "non-commercially" means for research or evaluation purposes only 60 | and not for any direct or indirect monetary gain. 61 | 62 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim 63 | against any Licensor (including any claim, cross-claim or 64 | counterclaim in a lawsuit) to enforce any patents that you allege 65 | are infringed by any Work, then your rights under this License from 66 | such Licensor (including the grant in Section 2.1) will terminate 67 | immediately. 68 | 69 | 3.5 Trademarks. This License does not grant any rights to use any 70 | Licensor's or its affiliates' names, logos, or trademarks, except 71 | as necessary to reproduce the notices described in this License. 72 | 73 | 3.6 Termination. If you violate any term of this License, then your 74 | rights under this License (including the grant in Section 2.1) will 75 | terminate immediately. 76 | 77 | 4. Disclaimer of Warranty. 78 | 79 | THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY 80 | KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF 81 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR 82 | NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER 83 | THIS LICENSE. 84 | 85 | 5. Limitation of Liability. 86 | 87 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL 88 | THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE 89 | SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, 90 | INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF 91 | OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK 92 | (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, 93 | LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER 94 | COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF 95 | THE POSSIBILITY OF SUCH DAMAGES. 96 | 97 | ======================================================================= 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Nvdiffrast – Modular Primitives for High-Performance Differentiable Rendering 2 | 3 | ![Teaser image](./docs/img/teaser.png) 4 | 5 | **Modular Primitives for High-Performance Differentiable Rendering**
6 | Samuli Laine, Janne Hellsten, Tero Karras, Yeongho Seol, Jaakko Lehtinen, Timo Aila
7 | [http://arxiv.org/abs/2011.03277](http://arxiv.org/abs/2011.03277) 8 | 9 | Nvdiffrast is a PyTorch/TensorFlow library that provides high-performance primitive operations for rasterization-based differentiable rendering. 10 | Please refer to ☞☞ [nvdiffrast documentation](https://nvlabs.github.io/nvdiffrast) ☜☜ for more information. 11 | 12 | ## Licenses 13 | 14 | Copyright © 2020–2024, NVIDIA Corporation. All rights reserved. 15 | 16 | This work is made available under the [Nvidia Source Code License](https://github.com/NVlabs/nvdiffrast/blob/main/LICENSE.txt). 17 | 18 | For business inquiries, please visit our website and submit the form: [NVIDIA Research Licensing](https://www.nvidia.com/en-us/research/inquiries/) 19 | 20 | We do not currently accept outside code contributions in the form of pull requests. 21 | 22 | Environment map stored as part of `samples/data/envphong.npz` is derived from a Wave Engine 23 | [sample material](https://github.com/WaveEngine/Samples-2.5/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap) 24 | originally shared under 25 | [MIT License](https://github.com/WaveEngine/Samples-2.5/blob/master/LICENSE.md). 26 | Mesh and texture stored as part of `samples/data/earth.npz` are derived from 27 | [3D Earth Photorealistic 2K](https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125) 28 | model originally made available under 29 | [TurboSquid 3D Model License](https://blog.turbosquid.com/turbosquid-3d-model-license/#3d-model-license). 30 | 31 | ## Citation 32 | 33 | ``` 34 | @article{Laine2020diffrast, 35 | title = {Modular Primitives for High-Performance Differentiable Rendering}, 36 | author = {Samuli Laine and Janne Hellsten and Tero Karras and Yeongho Seol and Jaakko Lehtinen and Timo Aila}, 37 | journal = {ACM Transactions on Graphics}, 38 | year = {2020}, 39 | volume = {39}, 40 | number = {6} 41 | } 42 | ``` 43 | -------------------------------------------------------------------------------- /docker/10_nvidia.json: -------------------------------------------------------------------------------- 1 | { 2 | "file_format_version" : "1.0.0", 3 | "ICD" : { 4 | "library_path" : "libEGL_nvidia.so.0" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # Note: Should also work with NVIDIA's Docker image builds such as 10 | # 11 | # nvcr.io/nvidia/pytorch:20.09-py3 12 | # 13 | # This file defaults to pytorch/pytorch as it works on slightly older 14 | # driver versions. 15 | FROM nvcr.io/nvidia/pytorch:23.03-py3 16 | 17 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 18 | pkg-config \ 19 | libglvnd0 \ 20 | libgl1 \ 21 | libglx0 \ 22 | libegl1 \ 23 | libgles2 \ 24 | libglvnd-dev \ 25 | libgl1-mesa-dev \ 26 | libegl1-mesa-dev \ 27 | libgles2-mesa-dev \ 28 | cmake \ 29 | curl 30 | 31 | ENV PYTHONDONTWRITEBYTECODE=1 32 | ENV PYTHONUNBUFFERED=1 33 | 34 | # for GLEW 35 | ENV LD_LIBRARY_PATH /usr/lib64:$LD_LIBRARY_PATH 36 | 37 | # nvidia-container-runtime 38 | ENV NVIDIA_VISIBLE_DEVICES all 39 | ENV NVIDIA_DRIVER_CAPABILITIES compute,utility,graphics 40 | 41 | # Default pyopengl to EGL for good headless rendering support 42 | ENV PYOPENGL_PLATFORM egl 43 | 44 | COPY docker/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json 45 | 46 | RUN pip install --upgrade pip 47 | RUN pip install ninja imageio imageio-ffmpeg 48 | 49 | COPY nvdiffrast /tmp/pip/nvdiffrast/ 50 | COPY README.md setup.py /tmp/pip/ 51 | RUN cd /tmp/pip && pip install . 52 | -------------------------------------------------------------------------------- /docs/img/cube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/cube.png -------------------------------------------------------------------------------- /docs/img/earth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/earth.png -------------------------------------------------------------------------------- /docs/img/envphong.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/envphong.png -------------------------------------------------------------------------------- /docs/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/logo.png -------------------------------------------------------------------------------- /docs/img/pipe_cube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/pipe_cube.png -------------------------------------------------------------------------------- /docs/img/pipe_earth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/pipe_earth.png -------------------------------------------------------------------------------- /docs/img/pipe_envphong.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/pipe_envphong.png -------------------------------------------------------------------------------- /docs/img/pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/pose.png -------------------------------------------------------------------------------- /docs/img/spot_aa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_aa.png -------------------------------------------------------------------------------- /docs/img/spot_crop1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_crop1.png -------------------------------------------------------------------------------- /docs/img/spot_crop2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_crop2.png -------------------------------------------------------------------------------- /docs/img/spot_diff1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_diff1.png -------------------------------------------------------------------------------- /docs/img/spot_diff2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_diff2.png -------------------------------------------------------------------------------- /docs/img/spot_peel1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_peel1.png -------------------------------------------------------------------------------- /docs/img/spot_peel2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_peel2.png -------------------------------------------------------------------------------- /docs/img/spot_st.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_st.png -------------------------------------------------------------------------------- /docs/img/spot_tex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_tex.png -------------------------------------------------------------------------------- /docs/img/spot_texture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_texture.png -------------------------------------------------------------------------------- /docs/img/spot_texw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_texw.png -------------------------------------------------------------------------------- /docs/img/spot_tri.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_tri.png -------------------------------------------------------------------------------- /docs/img/spot_uv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_uv.png -------------------------------------------------------------------------------- /docs/img/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser.png -------------------------------------------------------------------------------- /docs/img/teaser1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser1.png -------------------------------------------------------------------------------- /docs/img/teaser2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser2.png -------------------------------------------------------------------------------- /docs/img/teaser3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser3.png -------------------------------------------------------------------------------- /docs/img/teaser4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser4.png -------------------------------------------------------------------------------- /docs/img/teaser5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser5.png -------------------------------------------------------------------------------- /docs/img/thumb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/thumb.jpg -------------------------------------------------------------------------------- /docs/img/tri.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/tri.png -------------------------------------------------------------------------------- /nvdiffrast/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | __version__ = '0.3.3' 10 | -------------------------------------------------------------------------------- /nvdiffrast/common/antialias.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include "common.h" 11 | 12 | //------------------------------------------------------------------------ 13 | // Constants and helpers. 14 | 15 | #define AA_DISCONTINUITY_KERNEL_BLOCK_WIDTH 32 16 | #define AA_DISCONTINUITY_KERNEL_BLOCK_HEIGHT 8 17 | #define AA_ANALYSIS_KERNEL_THREADS_PER_BLOCK 256 18 | #define AA_MESH_KERNEL_THREADS_PER_BLOCK 256 19 | #define AA_HASH_ELEMENTS_PER_TRIANGLE(alloc) ((alloc) >= (2 << 25) ? 4 : 8) // With more than 16777216 triangles (alloc >= 33554432) use smallest possible value of 4 to conserve memory, otherwise use 8 for fewer collisions. 20 | #define AA_LOG_HASH_ELEMENTS_PER_TRIANGLE(alloc) ((alloc) >= (2 << 25) ? 2 : 3) 21 | #define AA_GRAD_KERNEL_THREADS_PER_BLOCK 256 22 | 23 | //------------------------------------------------------------------------ 24 | // CUDA kernel params. 25 | 26 | struct AntialiasKernelParams 27 | { 28 | const float* color; // Incoming color buffer. 29 | const float* rasterOut; // Incoming rasterizer output buffer. 30 | const int* tri; // Incoming triangle buffer. 31 | const float* pos; // Incoming position buffer. 32 | float* output; // Output buffer of forward kernel. 33 | const float* dy; // Incoming gradients. 34 | float* gradColor; // Output buffer, color gradient. 35 | float* gradPos; // Output buffer, position gradient. 36 | int4* workBuffer; // Buffer for storing intermediate work items. First item reserved for counters. 37 | uint4* evHash; // Edge-vertex hash. 38 | int allocTriangles; // Number of triangles accommodated by evHash. Always power of two. 39 | int numTriangles; // Number of triangles. 40 | int numVertices; // Number of vertices. 41 | int width; // Input width. 42 | int height; // Input height. 43 | int n; // Minibatch size. 44 | int channels; // Channel count in color input. 45 | float xh, yh; // Transfer to pixel space. 46 | int instance_mode; // 0=normal, 1=instance mode. 47 | int tri_const; // 1 if triangle array is known to be constant. 48 | }; 49 | 50 | //------------------------------------------------------------------------ 51 | -------------------------------------------------------------------------------- /nvdiffrast/common/common.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | 11 | //------------------------------------------------------------------------ 12 | // Block and grid size calculators for kernel launches. 13 | 14 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, int width, int height) 15 | { 16 | int maxThreads = maxWidth * maxHeight; 17 | if (maxThreads <= 1 || (width * height) <= 1) 18 | return dim3(1, 1, 1); // Degenerate. 19 | 20 | // Start from max size. 21 | int bw = maxWidth; 22 | int bh = maxHeight; 23 | 24 | // Optimizations for weirdly sized buffers. 25 | if (width < bw) 26 | { 27 | // Decrease block width to smallest power of two that covers the buffer width. 28 | while ((bw >> 1) >= width) 29 | bw >>= 1; 30 | 31 | // Maximize height. 32 | bh = maxThreads / bw; 33 | if (bh > height) 34 | bh = height; 35 | } 36 | else if (height < bh) 37 | { 38 | // Halve height and double width until fits completely inside buffer vertically. 39 | while (bh > height) 40 | { 41 | bh >>= 1; 42 | if (bw < width) 43 | bw <<= 1; 44 | } 45 | } 46 | 47 | // Done. 48 | return dim3(bw, bh, 1); 49 | } 50 | 51 | dim3 getLaunchGridSize(dim3 blockSize, int width, int height, int depth) 52 | { 53 | dim3 gridSize; 54 | gridSize.x = (width - 1) / blockSize.x + 1; 55 | gridSize.y = (height - 1) / blockSize.y + 1; 56 | gridSize.z = (depth - 1) / blockSize.z + 1; 57 | return gridSize; 58 | } 59 | 60 | //------------------------------------------------------------------------ 61 | -------------------------------------------------------------------------------- /nvdiffrast/common/cudaraster/CudaRaster.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | //------------------------------------------------------------------------ 12 | // This is a slimmed-down and modernized version of the original 13 | // CudaRaster codebase that accompanied the HPG 2011 paper 14 | // "High-Performance Software Rasterization on GPUs" by Laine and Karras. 15 | // Modifications have been made to accommodate post-Volta execution model 16 | // with warp divergence. Support for shading, blending, quad rendering, 17 | // and supersampling have been removed as unnecessary for nvdiffrast. 18 | //------------------------------------------------------------------------ 19 | 20 | namespace CR 21 | { 22 | 23 | class RasterImpl; 24 | 25 | //------------------------------------------------------------------------ 26 | // Interface class to isolate user from implementation details. 27 | //------------------------------------------------------------------------ 28 | 29 | class CudaRaster 30 | { 31 | public: 32 | enum 33 | { 34 | RenderModeFlag_EnableBackfaceCulling = 1 << 0, // Enable backface culling. 35 | RenderModeFlag_EnableDepthPeeling = 1 << 1, // Enable depth peeling. Must have a peel buffer set. 36 | }; 37 | 38 | public: 39 | CudaRaster (void); 40 | ~CudaRaster (void); 41 | 42 | void setBufferSize (int width, int height, int numImages); // Width and height are internally rounded up to multiples of tile size (8x8) for buffer sizes. 43 | void setViewport (int width, int height, int offsetX, int offsetY); // Tiled rendering viewport setup. 44 | void setRenderModeFlags (unsigned int renderModeFlags); // Affects all subsequent calls to drawTriangles(). Defaults to zero. 45 | void deferredClear (unsigned int clearColor); // Clears color and depth buffers during next call to drawTriangles(). 46 | void setVertexBuffer (void* vertices, int numVertices); // GPU pointer managed by caller. Vertex positions in clip space as float4 (x, y, z, w). 47 | void setIndexBuffer (void* indices, int numTriangles); // GPU pointer managed by caller. Triangle index+color quadruplets as uint4 (idx0, idx1, idx2, color). 48 | bool drawTriangles (const int* ranges, bool peel, cudaStream_t stream); // Ranges (offsets and counts) as #triangles entries, not as bytes. If NULL, draw all triangles. Returns false in case of internal overflow. 49 | void* getColorBuffer (void); // GPU pointer managed by CudaRaster. 50 | void* getDepthBuffer (void); // GPU pointer managed by CudaRaster. 51 | void swapDepthAndPeel (void); // Swap depth and peeling buffers. 52 | 53 | private: 54 | CudaRaster (const CudaRaster&); // forbidden 55 | CudaRaster& operator= (const CudaRaster&); // forbidden 56 | 57 | private: 58 | RasterImpl* m_impl; // Opaque pointer to implementation. 59 | }; 60 | 61 | //------------------------------------------------------------------------ 62 | } // namespace CR 63 | 64 | -------------------------------------------------------------------------------- /nvdiffrast/common/cudaraster/impl/Buffer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "../../framework.h" 10 | #include "Buffer.hpp" 11 | 12 | using namespace CR; 13 | 14 | //------------------------------------------------------------------------ 15 | // GPU buffer. 16 | //------------------------------------------------------------------------ 17 | 18 | Buffer::Buffer(void) 19 | : m_gpuPtr(NULL), 20 | m_bytes (0) 21 | { 22 | // empty 23 | } 24 | 25 | Buffer::~Buffer(void) 26 | { 27 | if (m_gpuPtr) 28 | cudaFree(m_gpuPtr); // Don't throw an exception. 29 | } 30 | 31 | void Buffer::reset(size_t bytes) 32 | { 33 | if (bytes == m_bytes) 34 | return; 35 | 36 | if (m_gpuPtr) 37 | { 38 | NVDR_CHECK_CUDA_ERROR(cudaFree(m_gpuPtr)); 39 | m_gpuPtr = NULL; 40 | } 41 | 42 | if (bytes > 0) 43 | NVDR_CHECK_CUDA_ERROR(cudaMalloc(&m_gpuPtr, bytes)); 44 | 45 | m_bytes = bytes; 46 | } 47 | 48 | void Buffer::grow(size_t bytes) 49 | { 50 | if (bytes > m_bytes) 51 | reset(bytes); 52 | } 53 | 54 | //------------------------------------------------------------------------ 55 | // Host buffer with page-locked memory. 56 | //------------------------------------------------------------------------ 57 | 58 | HostBuffer::HostBuffer(void) 59 | : m_hostPtr(NULL), 60 | m_bytes (0) 61 | { 62 | // empty 63 | } 64 | 65 | HostBuffer::~HostBuffer(void) 66 | { 67 | if (m_hostPtr) 68 | cudaFreeHost(m_hostPtr); // Don't throw an exception. 69 | } 70 | 71 | void HostBuffer::reset(size_t bytes) 72 | { 73 | if (bytes == m_bytes) 74 | return; 75 | 76 | if (m_hostPtr) 77 | { 78 | NVDR_CHECK_CUDA_ERROR(cudaFreeHost(m_hostPtr)); 79 | m_hostPtr = NULL; 80 | } 81 | 82 | if (bytes > 0) 83 | NVDR_CHECK_CUDA_ERROR(cudaMallocHost(&m_hostPtr, bytes)); 84 | 85 | m_bytes = bytes; 86 | } 87 | 88 | void HostBuffer::grow(size_t bytes) 89 | { 90 | if (bytes > m_bytes) 91 | reset(bytes); 92 | } 93 | 94 | //------------------------------------------------------------------------ 95 | -------------------------------------------------------------------------------- /nvdiffrast/common/cudaraster/impl/Buffer.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include "Defs.hpp" 11 | 12 | namespace CR 13 | { 14 | //------------------------------------------------------------------------ 15 | 16 | class Buffer 17 | { 18 | public: 19 | Buffer (void); 20 | ~Buffer (void); 21 | 22 | void reset (size_t bytes); 23 | void grow (size_t bytes); 24 | void* getPtr (size_t offset = 0) { return (void*)(((uintptr_t)m_gpuPtr) + offset); } 25 | size_t getSize (void) const { return m_bytes; } 26 | 27 | void setPtr (void* ptr) { m_gpuPtr = ptr; } 28 | 29 | private: 30 | void* m_gpuPtr; 31 | size_t m_bytes; 32 | }; 33 | 34 | //------------------------------------------------------------------------ 35 | 36 | class HostBuffer 37 | { 38 | public: 39 | HostBuffer (void); 40 | ~HostBuffer (void); 41 | 42 | void reset (size_t bytes); 43 | void grow (size_t bytes); 44 | void* getPtr (void) { return m_hostPtr; } 45 | size_t getSize (void) const { return m_bytes; } 46 | 47 | void setPtr (void* ptr) { m_hostPtr = ptr; } 48 | 49 | private: 50 | void* m_hostPtr; 51 | size_t m_bytes; 52 | }; 53 | 54 | //------------------------------------------------------------------------ 55 | } 56 | -------------------------------------------------------------------------------- /nvdiffrast/common/cudaraster/impl/Constants.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | //------------------------------------------------------------------------ 12 | 13 | #define CR_MAXVIEWPORT_LOG2 11 // ViewportSize / PixelSize. 14 | #define CR_SUBPIXEL_LOG2 4 // PixelSize / SubpixelSize. 15 | 16 | #define CR_MAXBINS_LOG2 4 // ViewportSize / BinSize. 17 | #define CR_BIN_LOG2 4 // BinSize / TileSize. 18 | #define CR_TILE_LOG2 3 // TileSize / PixelSize. 19 | 20 | #define CR_COVER8X8_LUT_SIZE 768 // 64-bit entries. 21 | #define CR_FLIPBIT_FLIP_Y 2 22 | #define CR_FLIPBIT_FLIP_X 3 23 | #define CR_FLIPBIT_SWAP_XY 4 24 | #define CR_FLIPBIT_COMPL 5 25 | 26 | #define CR_BIN_STREAMS_LOG2 4 27 | #define CR_BIN_SEG_LOG2 9 // 32-bit entries. 28 | #define CR_TILE_SEG_LOG2 5 // 32-bit entries. 29 | 30 | #define CR_MAXSUBTRIS_LOG2 24 // Triangle structs. Dictated by CoarseRaster. 31 | #define CR_COARSE_QUEUE_LOG2 10 // Triangles. 32 | 33 | #define CR_SETUP_WARPS 2 34 | #define CR_SETUP_OPT_BLOCKS 8 35 | #define CR_BIN_WARPS 16 36 | #define CR_COARSE_WARPS 16 // Must be a power of two. 37 | #define CR_FINE_MAX_WARPS 20 38 | 39 | #define CR_EMBED_IMAGE_PARAMS 32 // Number of per-image parameter structs embedded in kernel launch parameter block. 40 | 41 | //------------------------------------------------------------------------ 42 | 43 | #define CR_MAXVIEWPORT_SIZE (1 << CR_MAXVIEWPORT_LOG2) 44 | #define CR_SUBPIXEL_SIZE (1 << CR_SUBPIXEL_LOG2) 45 | #define CR_SUBPIXEL_SQR (1 << (CR_SUBPIXEL_LOG2 * 2)) 46 | 47 | #define CR_MAXBINS_SIZE (1 << CR_MAXBINS_LOG2) 48 | #define CR_MAXBINS_SQR (1 << (CR_MAXBINS_LOG2 * 2)) 49 | #define CR_BIN_SIZE (1 << CR_BIN_LOG2) 50 | #define CR_BIN_SQR (1 << (CR_BIN_LOG2 * 2)) 51 | 52 | #define CR_MAXTILES_LOG2 (CR_MAXBINS_LOG2 + CR_BIN_LOG2) 53 | #define CR_MAXTILES_SIZE (1 << CR_MAXTILES_LOG2) 54 | #define CR_MAXTILES_SQR (1 << (CR_MAXTILES_LOG2 * 2)) 55 | #define CR_TILE_SIZE (1 << CR_TILE_LOG2) 56 | #define CR_TILE_SQR (1 << (CR_TILE_LOG2 * 2)) 57 | 58 | #define CR_BIN_STREAMS_SIZE (1 << CR_BIN_STREAMS_LOG2) 59 | #define CR_BIN_SEG_SIZE (1 << CR_BIN_SEG_LOG2) 60 | #define CR_TILE_SEG_SIZE (1 << CR_TILE_SEG_LOG2) 61 | 62 | #define CR_MAXSUBTRIS_SIZE (1 << CR_MAXSUBTRIS_LOG2) 63 | #define CR_COARSE_QUEUE_SIZE (1 << CR_COARSE_QUEUE_LOG2) 64 | 65 | //------------------------------------------------------------------------ 66 | // When evaluating interpolated Z pixel centers, we may introduce an error 67 | // of (+-CR_LERP_ERROR) ULPs. 68 | 69 | #define CR_LERP_ERROR(SAMPLES_LOG2) (2200u << (SAMPLES_LOG2)) 70 | #define CR_DEPTH_MIN CR_LERP_ERROR(3) 71 | #define CR_DEPTH_MAX (CR_U32_MAX - CR_LERP_ERROR(3)) 72 | 73 | //------------------------------------------------------------------------ 74 | -------------------------------------------------------------------------------- /nvdiffrast/common/cudaraster/impl/CudaRaster.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "Defs.hpp" 10 | #include "../CudaRaster.hpp" 11 | #include "RasterImpl.hpp" 12 | 13 | using namespace CR; 14 | 15 | //------------------------------------------------------------------------ 16 | // Stub interface implementation. 17 | //------------------------------------------------------------------------ 18 | 19 | CudaRaster::CudaRaster() 20 | { 21 | m_impl = new RasterImpl(); 22 | } 23 | 24 | CudaRaster::~CudaRaster() 25 | { 26 | delete m_impl; 27 | } 28 | 29 | void CudaRaster::setBufferSize(int width, int height, int numImages) 30 | { 31 | m_impl->setBufferSize(Vec3i(width, height, numImages)); 32 | } 33 | 34 | void CudaRaster::setViewport(int width, int height, int offsetX, int offsetY) 35 | { 36 | m_impl->setViewport(Vec2i(width, height), Vec2i(offsetX, offsetY)); 37 | } 38 | 39 | void CudaRaster::setRenderModeFlags(U32 flags) 40 | { 41 | m_impl->setRenderModeFlags(flags); 42 | } 43 | 44 | void CudaRaster::deferredClear(U32 clearColor) 45 | { 46 | m_impl->deferredClear(clearColor); 47 | } 48 | 49 | void CudaRaster::setVertexBuffer(void* vertices, int numVertices) 50 | { 51 | m_impl->setVertexBuffer(vertices, numVertices); 52 | } 53 | 54 | void CudaRaster::setIndexBuffer(void* indices, int numTriangles) 55 | { 56 | m_impl->setIndexBuffer(indices, numTriangles); 57 | } 58 | 59 | bool CudaRaster::drawTriangles(const int* ranges, bool peel, cudaStream_t stream) 60 | { 61 | return m_impl->drawTriangles((const Vec2i*)ranges, peel, stream); 62 | } 63 | 64 | void* CudaRaster::getColorBuffer(void) 65 | { 66 | return m_impl->getColorBuffer(); 67 | } 68 | 69 | void* CudaRaster::getDepthBuffer(void) 70 | { 71 | return m_impl->getDepthBuffer(); 72 | } 73 | 74 | void CudaRaster::swapDepthAndPeel(void) 75 | { 76 | m_impl->swapDepthAndPeel(); 77 | } 78 | 79 | //------------------------------------------------------------------------ 80 | -------------------------------------------------------------------------------- /nvdiffrast/common/cudaraster/impl/Defs.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include 11 | #include 12 | 13 | namespace CR 14 | { 15 | //------------------------------------------------------------------------ 16 | 17 | #ifndef NULL 18 | # define NULL 0 19 | #endif 20 | 21 | #ifdef __CUDACC__ 22 | # define CR_CUDA 1 23 | #else 24 | # define CR_CUDA 0 25 | #endif 26 | 27 | #if CR_CUDA 28 | # define CR_CUDA_FUNC __device__ __inline__ 29 | # define CR_CUDA_CONST __constant__ 30 | #else 31 | # define CR_CUDA_FUNC inline 32 | # define CR_CUDA_CONST static const 33 | #endif 34 | 35 | #define CR_UNREF(X) ((void)(X)) 36 | #define CR_ARRAY_SIZE(X) ((int)(sizeof(X) / sizeof((X)[0]))) 37 | 38 | //------------------------------------------------------------------------ 39 | 40 | typedef uint8_t U8; 41 | typedef uint16_t U16; 42 | typedef uint32_t U32; 43 | typedef uint64_t U64; 44 | typedef int8_t S8; 45 | typedef int16_t S16; 46 | typedef int32_t S32; 47 | typedef int64_t S64; 48 | typedef float F32; 49 | typedef double F64; 50 | typedef void (*FuncPtr)(void); 51 | 52 | //------------------------------------------------------------------------ 53 | 54 | #define CR_U32_MAX (0xFFFFFFFFu) 55 | #define CR_S32_MIN (~0x7FFFFFFF) 56 | #define CR_S32_MAX (0x7FFFFFFF) 57 | #define CR_U64_MAX ((U64)(S64)-1) 58 | #define CR_S64_MIN ((S64)-1 << 63) 59 | #define CR_S64_MAX (~((S64)-1 << 63)) 60 | #define CR_F32_MIN (1.175494351e-38f) 61 | #define CR_F32_MAX (3.402823466e+38f) 62 | #define CR_F64_MIN (2.2250738585072014e-308) 63 | #define CR_F64_MAX (1.7976931348623158e+308) 64 | 65 | //------------------------------------------------------------------------ 66 | // Misc types. 67 | 68 | class Vec2i 69 | { 70 | public: 71 | Vec2i(int x_, int y_) : x(x_), y(y_) {} 72 | int x, y; 73 | }; 74 | 75 | class Vec3i 76 | { 77 | public: 78 | Vec3i(int x_, int y_, int z_) : x(x_), y(y_), z(z_) {} 79 | int x, y, z; 80 | }; 81 | 82 | //------------------------------------------------------------------------ 83 | // CUDA utilities. 84 | 85 | #if CR_CUDA 86 | # define globalThreadIdx (threadIdx.x + blockDim.x * (threadIdx.y + blockDim.y * (blockIdx.x + gridDim.x * blockIdx.y))) 87 | #endif 88 | 89 | //------------------------------------------------------------------------ 90 | } // namespace CR 91 | -------------------------------------------------------------------------------- /nvdiffrast/common/cudaraster/impl/PrivateDefs.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include "Defs.hpp" 11 | #include "Constants.hpp" 12 | 13 | namespace CR 14 | { 15 | //------------------------------------------------------------------------ 16 | // Projected triangle. 17 | //------------------------------------------------------------------------ 18 | 19 | struct CRTriangleHeader 20 | { 21 | S16 v0x; // Subpixels relative to viewport center. Valid if triSubtris = 1. 22 | S16 v0y; 23 | S16 v1x; 24 | S16 v1y; 25 | S16 v2x; 26 | S16 v2y; 27 | 28 | U32 misc; // triSubtris=1: (zmin:20, f01:4, f12:4, f20:4), triSubtris>=2: (subtriBase) 29 | }; 30 | 31 | //------------------------------------------------------------------------ 32 | 33 | struct CRTriangleData 34 | { 35 | U32 zx; // zx * sampleX + zy * sampleY + zb = lerp(CR_DEPTH_MIN, CR_DEPTH_MAX, (clipZ / clipW + 1) / 2) 36 | U32 zy; 37 | U32 zb; 38 | U32 id; // Triangle id. 39 | }; 40 | 41 | //------------------------------------------------------------------------ 42 | // Device-side structures. 43 | //------------------------------------------------------------------------ 44 | 45 | struct CRAtomics 46 | { 47 | // Setup. 48 | S32 numSubtris; // = numTris 49 | 50 | // Bin. 51 | S32 binCounter; // = 0 52 | S32 numBinSegs; // = 0 53 | 54 | // Coarse. 55 | S32 coarseCounter; // = 0 56 | S32 numTileSegs; // = 0 57 | S32 numActiveTiles; // = 0 58 | 59 | // Fine. 60 | S32 fineCounter; // = 0 61 | }; 62 | 63 | //------------------------------------------------------------------------ 64 | 65 | struct CRImageParams 66 | { 67 | S32 triOffset; // First triangle index to draw. 68 | S32 triCount; // Number of triangles to draw. 69 | S32 binBatchSize; // Number of triangles per batch. 70 | }; 71 | 72 | //------------------------------------------------------------------------ 73 | 74 | struct CRParams 75 | { 76 | // Common. 77 | 78 | CRAtomics* atomics; // Work counters. Per-image. 79 | S32 numImages; // Batch size. 80 | S32 totalCount; // In range mode, total number of triangles to render. 81 | S32 instanceMode; // 0 = range mode, 1 = instance mode. 82 | 83 | S32 numVertices; // Number of vertices in input buffer, not counting multiples in instance mode. 84 | S32 numTriangles; // Number of triangles in input buffer. 85 | void* vertexBuffer; // numVertices * float4(x, y, z, w) 86 | void* indexBuffer; // numTriangles * int3(vi0, vi1, vi2) 87 | 88 | S32 widthPixels; // Render buffer size in pixels. Must be multiple of tile size (8x8). 89 | S32 heightPixels; 90 | S32 widthPixelsVp; // Viewport size in pixels. 91 | S32 heightPixelsVp; 92 | S32 widthBins; // widthPixels / CR_BIN_SIZE 93 | S32 heightBins; // heightPixels / CR_BIN_SIZE 94 | S32 numBins; // widthBins * heightBins 95 | 96 | F32 xs; // Vertex position adjustments for tiled rendering. 97 | F32 ys; 98 | F32 xo; 99 | F32 yo; 100 | 101 | S32 widthTiles; // widthPixels / CR_TILE_SIZE 102 | S32 heightTiles; // heightPixels / CR_TILE_SIZE 103 | S32 numTiles; // widthTiles * heightTiles 104 | 105 | U32 renderModeFlags; 106 | S32 deferredClear; // 1 = Clear framebuffer before rendering triangles. 107 | U32 clearColor; 108 | U32 clearDepth; 109 | 110 | // These are uniform across batch. 111 | 112 | S32 maxSubtris; 113 | S32 maxBinSegs; 114 | S32 maxTileSegs; 115 | 116 | // Setup output / bin input. 117 | 118 | void* triSubtris; // maxSubtris * U8 119 | void* triHeader; // maxSubtris * CRTriangleHeader 120 | void* triData; // maxSubtris * CRTriangleData 121 | 122 | // Bin output / coarse input. 123 | 124 | void* binSegData; // maxBinSegs * CR_BIN_SEG_SIZE * S32 125 | void* binSegNext; // maxBinSegs * S32 126 | void* binSegCount; // maxBinSegs * S32 127 | void* binFirstSeg; // CR_MAXBINS_SQR * CR_BIN_STREAMS_SIZE * (S32 segIdx), -1 = none 128 | void* binTotal; // CR_MAXBINS_SQR * CR_BIN_STREAMS_SIZE * (S32 numTris) 129 | 130 | // Coarse output / fine input. 131 | 132 | void* tileSegData; // maxTileSegs * CR_TILE_SEG_SIZE * S32 133 | void* tileSegNext; // maxTileSegs * S32 134 | void* tileSegCount; // maxTileSegs * S32 135 | void* activeTiles; // CR_MAXTILES_SQR * (S32 tileIdx) 136 | void* tileFirstSeg; // CR_MAXTILES_SQR * (S32 segIdx), -1 = none 137 | 138 | // Surface buffers. Outer tile offset is baked into pointers. 139 | 140 | void* colorBuffer; // sizePixels.x * sizePixels.y * numImages * U32 141 | void* depthBuffer; // sizePixels.x * sizePixels.y * numImages * U32 142 | void* peelBuffer; // sizePixels.x * sizePixels.y * numImages * U32, only if peeling enabled. 143 | S32 strideX; // horizontal size in pixels 144 | S32 strideY; // vertical stride in pixels 145 | 146 | // Per-image parameters for first images are embedded here to avoid extra memcpy for small batches. 147 | 148 | CRImageParams imageParamsFirst[CR_EMBED_IMAGE_PARAMS]; 149 | const CRImageParams* imageParamsExtra; // After CR_EMBED_IMAGE_PARAMS. 150 | }; 151 | 152 | //------------------------------------------------------------------------ 153 | } 154 | -------------------------------------------------------------------------------- /nvdiffrast/common/cudaraster/impl/RasterImpl.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "../CudaRaster.hpp" 10 | #include "PrivateDefs.hpp" 11 | #include "Constants.hpp" 12 | #include "Util.inl" 13 | 14 | namespace CR 15 | { 16 | 17 | //------------------------------------------------------------------------ 18 | // Stage implementations. 19 | //------------------------------------------------------------------------ 20 | 21 | #include "TriangleSetup.inl" 22 | #include "BinRaster.inl" 23 | #include "CoarseRaster.inl" 24 | #include "FineRaster.inl" 25 | 26 | } 27 | 28 | //------------------------------------------------------------------------ 29 | // Stage entry points. 30 | //------------------------------------------------------------------------ 31 | 32 | __global__ void __launch_bounds__(CR_SETUP_WARPS * 32, CR_SETUP_OPT_BLOCKS) triangleSetupKernel (const CR::CRParams p) { CR::triangleSetupImpl(p); } 33 | __global__ void __launch_bounds__(CR_BIN_WARPS * 32, 1) binRasterKernel (const CR::CRParams p) { CR::binRasterImpl(p); } 34 | __global__ void __launch_bounds__(CR_COARSE_WARPS * 32, 1) coarseRasterKernel (const CR::CRParams p) { CR::coarseRasterImpl(p); } 35 | __global__ void __launch_bounds__(CR_FINE_MAX_WARPS * 32, 1) fineRasterKernel (const CR::CRParams p) { CR::fineRasterImpl(p); } 36 | 37 | //------------------------------------------------------------------------ 38 | -------------------------------------------------------------------------------- /nvdiffrast/common/cudaraster/impl/RasterImpl.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include "PrivateDefs.hpp" 11 | #include "Buffer.hpp" 12 | #include "../CudaRaster.hpp" 13 | 14 | namespace CR 15 | { 16 | //------------------------------------------------------------------------ 17 | 18 | class RasterImpl 19 | { 20 | public: 21 | RasterImpl (void); 22 | ~RasterImpl (void); 23 | 24 | void setBufferSize (Vec3i size); 25 | void setViewport (Vec2i size, Vec2i offset); 26 | void setRenderModeFlags (U32 flags) { m_renderModeFlags = flags; } 27 | void deferredClear (U32 color) { m_deferredClear = true; m_clearColor = color; } 28 | void setVertexBuffer (void* ptr, int numVertices) { m_vertexPtr = ptr; m_numVertices = numVertices; } // GPU pointer. 29 | void setIndexBuffer (void* ptr, int numTriangles) { m_indexPtr = ptr; m_numTriangles = numTriangles; } // GPU pointer. 30 | bool drawTriangles (const Vec2i* ranges, bool peel, cudaStream_t stream); 31 | void* getColorBuffer (void) { return m_colorBuffer.getPtr(); } // GPU pointer. 32 | void* getDepthBuffer (void) { return m_depthBuffer.getPtr(); } // GPU pointer. 33 | void swapDepthAndPeel (void); 34 | size_t getTotalBufferSizes (void) const; 35 | 36 | private: 37 | void launchStages (bool instanceMode, bool peel, cudaStream_t stream); 38 | 39 | // State. 40 | 41 | unsigned int m_renderModeFlags; 42 | bool m_deferredClear; 43 | unsigned int m_clearColor; 44 | void* m_vertexPtr; 45 | void* m_indexPtr; 46 | int m_numVertices; // Input buffer size. 47 | int m_numTriangles; // Input buffer size. 48 | size_t m_bufferSizesReported; // Previously reported buffer sizes. 49 | 50 | // Surfaces. 51 | 52 | Buffer m_colorBuffer; 53 | Buffer m_depthBuffer; 54 | Buffer m_peelBuffer; 55 | int m_numImages; 56 | Vec2i m_bufferSizePixels; // Internal buffer size. 57 | Vec2i m_bufferSizeVp; // Total viewport size. 58 | Vec2i m_sizePixels; // Internal size at which all computation is done, buffers reserved, etc. 59 | Vec2i m_sizeVp; // Size to which output will be cropped outside, determines viewport size. 60 | Vec2i m_offsetPixels; // Viewport offset for tiled rendering. 61 | Vec2i m_sizeBins; 62 | S32 m_numBins; 63 | Vec2i m_sizeTiles; 64 | S32 m_numTiles; 65 | 66 | // Launch sizes etc. 67 | 68 | S32 m_numSMs; 69 | S32 m_numCoarseBlocksPerSM; 70 | S32 m_numFineBlocksPerSM; 71 | S32 m_numFineWarpsPerBlock; 72 | 73 | // Global intermediate buffers. Individual images have offsets to these. 74 | 75 | Buffer m_crAtomics; 76 | HostBuffer m_crAtomicsHost; 77 | HostBuffer m_crImageParamsHost; 78 | Buffer m_crImageParamsExtra; 79 | Buffer m_triSubtris; 80 | Buffer m_triHeader; 81 | Buffer m_triData; 82 | Buffer m_binFirstSeg; 83 | Buffer m_binTotal; 84 | Buffer m_binSegData; 85 | Buffer m_binSegNext; 86 | Buffer m_binSegCount; 87 | Buffer m_activeTiles; 88 | Buffer m_tileFirstSeg; 89 | Buffer m_tileSegData; 90 | Buffer m_tileSegNext; 91 | Buffer m_tileSegCount; 92 | 93 | // Actual buffer sizes. 94 | 95 | S32 m_maxSubtris; 96 | S32 m_maxBinSegs; 97 | S32 m_maxTileSegs; 98 | }; 99 | 100 | //------------------------------------------------------------------------ 101 | } // namespace CR 102 | 103 | -------------------------------------------------------------------------------- /nvdiffrast/common/framework.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | // Framework-specific macros to enable code sharing. 12 | 13 | //------------------------------------------------------------------------ 14 | // Tensorflow. 15 | 16 | #ifdef NVDR_TENSORFLOW 17 | #define EIGEN_USE_GPU 18 | #include "tensorflow/core/framework/op.h" 19 | #include "tensorflow/core/framework/op_kernel.h" 20 | #include "tensorflow/core/framework/shape_inference.h" 21 | #include "tensorflow/core/platform/default/logging.h" 22 | using namespace tensorflow; 23 | using namespace tensorflow::shape_inference; 24 | #define NVDR_CTX_ARGS OpKernelContext* _nvdr_ctx 25 | #define NVDR_CTX_PARAMS _nvdr_ctx 26 | #define NVDR_CHECK(COND, ERR) OP_REQUIRES(_nvdr_ctx, COND, errors::Internal(ERR)) 27 | #define NVDR_CHECK_CUDA_ERROR(CUDA_CALL) OP_CHECK_CUDA_ERROR(_nvdr_ctx, CUDA_CALL) 28 | #define NVDR_CHECK_GL_ERROR(GL_CALL) OP_CHECK_GL_ERROR(_nvdr_ctx, GL_CALL) 29 | #endif 30 | 31 | //------------------------------------------------------------------------ 32 | // PyTorch. 33 | 34 | #ifdef NVDR_TORCH 35 | #ifndef __CUDACC__ 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #endif 42 | #define NVDR_CTX_ARGS int _nvdr_ctx_dummy 43 | #define NVDR_CTX_PARAMS 0 44 | #define NVDR_CHECK(COND, ERR) do { TORCH_CHECK(COND, ERR) } while(0) 45 | #define NVDR_CHECK_CUDA_ERROR(CUDA_CALL) do { cudaError_t err = CUDA_CALL; TORCH_CHECK(!err, "Cuda error: ", cudaGetLastError(), "[", #CUDA_CALL, ";]"); } while(0) 46 | #define NVDR_CHECK_GL_ERROR(GL_CALL) do { GL_CALL; GLenum err = glGetError(); TORCH_CHECK(err == GL_NO_ERROR, "OpenGL error: ", getGLErrorString(err), "[", #GL_CALL, ";]"); } while(0) 47 | #endif 48 | 49 | //------------------------------------------------------------------------ 50 | -------------------------------------------------------------------------------- /nvdiffrast/common/glutil.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | //------------------------------------------------------------------------ 12 | // Windows-specific headers and types. 13 | //------------------------------------------------------------------------ 14 | 15 | #ifdef _WIN32 16 | #define NOMINMAX 17 | #include // Required by gl.h in Windows. 18 | #define GLAPIENTRY APIENTRY 19 | 20 | struct GLContext 21 | { 22 | HDC hdc; 23 | HGLRC hglrc; 24 | int extInitialized; 25 | }; 26 | 27 | #endif // _WIN32 28 | 29 | //------------------------------------------------------------------------ 30 | // Linux-specific headers and types. 31 | //------------------------------------------------------------------------ 32 | 33 | #ifdef __linux__ 34 | #define EGL_NO_X11 // X11/Xlib.h has "#define Status int" which breaks Tensorflow. Avoid it. 35 | #define MESA_EGL_NO_X11_HEADERS 36 | #include 37 | #include 38 | #define GLAPIENTRY 39 | 40 | struct GLContext 41 | { 42 | EGLDisplay display; 43 | EGLContext context; 44 | int extInitialized; 45 | }; 46 | 47 | #endif // __linux__ 48 | 49 | //------------------------------------------------------------------------ 50 | // OpenGL, CUDA interop, GL extensions. 51 | //------------------------------------------------------------------------ 52 | #define GL_GLEXT_LEGACY 53 | #include 54 | #include 55 | 56 | // Constants. 57 | #ifndef GL_VERSION_1_2 58 | #define GL_CLAMP_TO_EDGE 0x812F 59 | #define GL_TEXTURE_3D 0x806F 60 | #endif 61 | #ifndef GL_VERSION_1_5 62 | #define GL_ARRAY_BUFFER 0x8892 63 | #define GL_DYNAMIC_DRAW 0x88E8 64 | #define GL_ELEMENT_ARRAY_BUFFER 0x8893 65 | #endif 66 | #ifndef GL_VERSION_2_0 67 | #define GL_FRAGMENT_SHADER 0x8B30 68 | #define GL_INFO_LOG_LENGTH 0x8B84 69 | #define GL_LINK_STATUS 0x8B82 70 | #define GL_VERTEX_SHADER 0x8B31 71 | #endif 72 | #ifndef GL_VERSION_3_0 73 | #define GL_MAJOR_VERSION 0x821B 74 | #define GL_MINOR_VERSION 0x821C 75 | #define GL_RGBA32F 0x8814 76 | #define GL_TEXTURE_2D_ARRAY 0x8C1A 77 | #endif 78 | #ifndef GL_VERSION_3_2 79 | #define GL_GEOMETRY_SHADER 0x8DD9 80 | #endif 81 | #ifndef GL_ARB_framebuffer_object 82 | #define GL_COLOR_ATTACHMENT0 0x8CE0 83 | #define GL_COLOR_ATTACHMENT1 0x8CE1 84 | #define GL_DEPTH_STENCIL 0x84F9 85 | #define GL_DEPTH_STENCIL_ATTACHMENT 0x821A 86 | #define GL_DEPTH24_STENCIL8 0x88F0 87 | #define GL_FRAMEBUFFER 0x8D40 88 | #define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 89 | #define GL_UNSIGNED_INT_24_8 0x84FA 90 | #endif 91 | #ifndef GL_ARB_imaging 92 | #define GL_TABLE_TOO_LARGE 0x8031 93 | #endif 94 | #ifndef GL_KHR_robustness 95 | #define GL_CONTEXT_LOST 0x0507 96 | #endif 97 | 98 | // Declare function pointers to OpenGL extension functions. 99 | #define GLUTIL_EXT(return_type, name, ...) extern return_type (GLAPIENTRY* name)(__VA_ARGS__); 100 | #include "glutil_extlist.h" 101 | #undef GLUTIL_EXT 102 | 103 | //------------------------------------------------------------------------ 104 | // Common functions. 105 | //------------------------------------------------------------------------ 106 | 107 | void setGLContext (GLContext& glctx); 108 | void releaseGLContext (void); 109 | GLContext createGLContext (int cudaDeviceIdx); 110 | void destroyGLContext (GLContext& glctx); 111 | const char* getGLErrorString (GLenum err); 112 | 113 | //------------------------------------------------------------------------ 114 | -------------------------------------------------------------------------------- /nvdiffrast/common/glutil_extlist.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #ifndef GL_VERSION_1_2 10 | GLUTIL_EXT(void, glTexImage3D, GLenum target, GLint level, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void *pixels); 11 | #endif 12 | #ifndef GL_VERSION_1_5 13 | GLUTIL_EXT(void, glBindBuffer, GLenum target, GLuint buffer); 14 | GLUTIL_EXT(void, glBufferData, GLenum target, ptrdiff_t size, const void* data, GLenum usage); 15 | GLUTIL_EXT(void, glGenBuffers, GLsizei n, GLuint* buffers); 16 | #endif 17 | #ifndef GL_VERSION_2_0 18 | GLUTIL_EXT(void, glAttachShader, GLuint program, GLuint shader); 19 | GLUTIL_EXT(void, glCompileShader, GLuint shader); 20 | GLUTIL_EXT(GLuint, glCreateProgram, void); 21 | GLUTIL_EXT(GLuint, glCreateShader, GLenum type); 22 | GLUTIL_EXT(void, glDrawBuffers, GLsizei n, const GLenum* bufs); 23 | GLUTIL_EXT(void, glEnableVertexAttribArray, GLuint index); 24 | GLUTIL_EXT(void, glGetProgramInfoLog, GLuint program, GLsizei bufSize, GLsizei* length, char* infoLog); 25 | GLUTIL_EXT(void, glGetProgramiv, GLuint program, GLenum pname, GLint* param); 26 | GLUTIL_EXT(void, glLinkProgram, GLuint program); 27 | GLUTIL_EXT(void, glShaderSource, GLuint shader, GLsizei count, const char *const* string, const GLint* length); 28 | GLUTIL_EXT(void, glUniform1f, GLint location, GLfloat v0); 29 | GLUTIL_EXT(void, glUniform2f, GLint location, GLfloat v0, GLfloat v1); 30 | GLUTIL_EXT(void, glUseProgram, GLuint program); 31 | GLUTIL_EXT(void, glVertexAttribPointer, GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void* pointer); 32 | #endif 33 | #ifndef GL_VERSION_3_2 34 | GLUTIL_EXT(void, glFramebufferTexture, GLenum target, GLenum attachment, GLuint texture, GLint level); 35 | #endif 36 | #ifndef GL_ARB_framebuffer_object 37 | GLUTIL_EXT(void, glBindFramebuffer, GLenum target, GLuint framebuffer); 38 | GLUTIL_EXT(void, glGenFramebuffers, GLsizei n, GLuint* framebuffers); 39 | #endif 40 | #ifndef GL_ARB_vertex_array_object 41 | GLUTIL_EXT(void, glBindVertexArray, GLuint array); 42 | GLUTIL_EXT(void, glGenVertexArrays, GLsizei n, GLuint* arrays); 43 | #endif 44 | #ifndef GL_ARB_multi_draw_indirect 45 | GLUTIL_EXT(void, glMultiDrawElementsIndirect, GLenum mode, GLenum type, const void *indirect, GLsizei primcount, GLsizei stride); 46 | #endif 47 | 48 | //------------------------------------------------------------------------ 49 | -------------------------------------------------------------------------------- /nvdiffrast/common/interpolate.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "common.h" 10 | #include "interpolate.h" 11 | 12 | //------------------------------------------------------------------------ 13 | // Forward kernel. 14 | 15 | template 16 | static __forceinline__ __device__ void InterpolateFwdKernelTemplate(const InterpolateKernelParams p) 17 | { 18 | // Calculate pixel position. 19 | int px = blockIdx.x * blockDim.x + threadIdx.x; 20 | int py = blockIdx.y * blockDim.y + threadIdx.y; 21 | int pz = blockIdx.z; 22 | if (px >= p.width || py >= p.height || pz >= p.depth) 23 | return; 24 | 25 | // Pixel index. 26 | int pidx = px + p.width * (py + p.height * pz); 27 | 28 | // Output ptrs. 29 | float* out = p.out + pidx * p.numAttr; 30 | float2* outDA = ENABLE_DA ? (((float2*)p.outDA) + pidx * p.numDiffAttr) : 0; 31 | 32 | // Fetch rasterizer output. 33 | float4 r = ((float4*)p.rast)[pidx]; 34 | int triIdx = float_to_triidx(r.w) - 1; 35 | bool triValid = (triIdx >= 0 && triIdx < p.numTriangles); 36 | 37 | // If no geometry in entire warp, zero the output and exit. 38 | // Otherwise force barys to zero and output with live threads. 39 | if (__all_sync(0xffffffffu, !triValid)) 40 | { 41 | for (int i=0; i < p.numAttr; i++) 42 | out[i] = 0.f; 43 | if (ENABLE_DA) 44 | for (int i=0; i < p.numDiffAttr; i++) 45 | outDA[i] = make_float2(0.f, 0.f); 46 | return; 47 | } 48 | 49 | // Fetch vertex indices. 50 | int vi0 = triValid ? p.tri[triIdx * 3 + 0] : 0; 51 | int vi1 = triValid ? p.tri[triIdx * 3 + 1] : 0; 52 | int vi2 = triValid ? p.tri[triIdx * 3 + 2] : 0; 53 | 54 | // Bail out if corrupt indices. 55 | if (vi0 < 0 || vi0 >= p.numVertices || 56 | vi1 < 0 || vi1 >= p.numVertices || 57 | vi2 < 0 || vi2 >= p.numVertices) 58 | return; 59 | 60 | // In instance mode, adjust vertex indices by minibatch index unless broadcasting. 61 | if (p.instance_mode && !p.attrBC) 62 | { 63 | vi0 += pz * p.numVertices; 64 | vi1 += pz * p.numVertices; 65 | vi2 += pz * p.numVertices; 66 | } 67 | 68 | // Pointers to attributes. 69 | const float* a0 = p.attr + vi0 * p.numAttr; 70 | const float* a1 = p.attr + vi1 * p.numAttr; 71 | const float* a2 = p.attr + vi2 * p.numAttr; 72 | 73 | // Barys. If no triangle, force all to zero -> output is zero. 74 | float b0 = triValid ? r.x : 0.f; 75 | float b1 = triValid ? r.y : 0.f; 76 | float b2 = triValid ? (1.f - r.x - r.y) : 0.f; 77 | 78 | // Interpolate and write attributes. 79 | for (int i=0; i < p.numAttr; i++) 80 | out[i] = b0*a0[i] + b1*a1[i] + b2*a2[i]; 81 | 82 | // No diff attrs? Exit. 83 | if (!ENABLE_DA) 84 | return; 85 | 86 | // Read bary pixel differentials if we have a triangle. 87 | float4 db = make_float4(0.f, 0.f, 0.f, 0.f); 88 | if (triValid) 89 | db = ((float4*)p.rastDB)[pidx]; 90 | 91 | // Unpack a bit. 92 | float dudx = db.x; 93 | float dudy = db.y; 94 | float dvdx = db.z; 95 | float dvdy = db.w; 96 | 97 | // Calculate the pixel differentials of chosen attributes. 98 | for (int i=0; i < p.numDiffAttr; i++) 99 | { 100 | // Input attribute index. 101 | int j = p.diff_attrs_all ? i : p.diffAttrs[i]; 102 | if (j < 0) 103 | j += p.numAttr; // Python-style negative indices. 104 | 105 | // Zero output if invalid index. 106 | float dsdx = 0.f; 107 | float dsdy = 0.f; 108 | if (j >= 0 && j < p.numAttr) 109 | { 110 | float s0 = a0[j]; 111 | float s1 = a1[j]; 112 | float s2 = a2[j]; 113 | float dsdu = s0 - s2; 114 | float dsdv = s1 - s2; 115 | dsdx = dudx*dsdu + dvdx*dsdv; 116 | dsdy = dudy*dsdu + dvdy*dsdv; 117 | } 118 | 119 | // Write. 120 | outDA[i] = make_float2(dsdx, dsdy); 121 | } 122 | } 123 | 124 | // Template specializations. 125 | __global__ void InterpolateFwdKernel (const InterpolateKernelParams p) { InterpolateFwdKernelTemplate(p); } 126 | __global__ void InterpolateFwdKernelDa(const InterpolateKernelParams p) { InterpolateFwdKernelTemplate(p); } 127 | 128 | //------------------------------------------------------------------------ 129 | // Gradient kernel. 130 | 131 | template 132 | static __forceinline__ __device__ void InterpolateGradKernelTemplate(const InterpolateKernelParams p) 133 | { 134 | // Temporary space for coalesced atomics. 135 | CA_DECLARE_TEMP(IP_GRAD_MAX_KERNEL_BLOCK_WIDTH * IP_GRAD_MAX_KERNEL_BLOCK_HEIGHT); 136 | 137 | // Calculate pixel position. 138 | int px = blockIdx.x * blockDim.x + threadIdx.x; 139 | int py = blockIdx.y * blockDim.y + threadIdx.y; 140 | int pz = blockIdx.z; 141 | if (px >= p.width || py >= p.height || pz >= p.depth) 142 | return; 143 | 144 | // Pixel index. 145 | int pidx = px + p.width * (py + p.height * pz); 146 | 147 | // Fetch triangle ID. If none, output zero bary/db gradients and exit. 148 | float4 r = ((float4*)p.rast)[pidx]; 149 | int triIdx = float_to_triidx(r.w) - 1; 150 | if (triIdx < 0 || triIdx >= p.numTriangles) 151 | { 152 | ((float4*)p.gradRaster)[pidx] = make_float4(0.f, 0.f, 0.f, 0.f); 153 | if (ENABLE_DA) 154 | ((float4*)p.gradRasterDB)[pidx] = make_float4(0.f, 0.f, 0.f, 0.f); 155 | return; 156 | } 157 | 158 | // Fetch vertex indices. 159 | int vi0 = p.tri[triIdx * 3 + 0]; 160 | int vi1 = p.tri[triIdx * 3 + 1]; 161 | int vi2 = p.tri[triIdx * 3 + 2]; 162 | 163 | // Bail out if corrupt indices. 164 | if (vi0 < 0 || vi0 >= p.numVertices || 165 | vi1 < 0 || vi1 >= p.numVertices || 166 | vi2 < 0 || vi2 >= p.numVertices) 167 | return; 168 | 169 | // In instance mode, adjust vertex indices by minibatch index unless broadcasting. 170 | if (p.instance_mode && !p.attrBC) 171 | { 172 | vi0 += pz * p.numVertices; 173 | vi1 += pz * p.numVertices; 174 | vi2 += pz * p.numVertices; 175 | } 176 | 177 | // Initialize coalesced atomics. 178 | CA_SET_GROUP(triIdx); 179 | 180 | // Pointers to inputs. 181 | const float* a0 = p.attr + vi0 * p.numAttr; 182 | const float* a1 = p.attr + vi1 * p.numAttr; 183 | const float* a2 = p.attr + vi2 * p.numAttr; 184 | const float* pdy = p.dy + pidx * p.numAttr; 185 | 186 | // Pointers to outputs. 187 | float* ga0 = p.gradAttr + vi0 * p.numAttr; 188 | float* ga1 = p.gradAttr + vi1 * p.numAttr; 189 | float* ga2 = p.gradAttr + vi2 * p.numAttr; 190 | 191 | // Barys and bary gradient accumulators. 192 | float b0 = r.x; 193 | float b1 = r.y; 194 | float b2 = 1.f - r.x - r.y; 195 | float gb0 = 0.f; 196 | float gb1 = 0.f; 197 | 198 | // Loop over attributes and accumulate attribute gradients. 199 | for (int i=0; i < p.numAttr; i++) 200 | { 201 | float y = pdy[i]; 202 | float s0 = a0[i]; 203 | float s1 = a1[i]; 204 | float s2 = a2[i]; 205 | gb0 += y * (s0 - s2); 206 | gb1 += y * (s1 - s2); 207 | caAtomicAdd(ga0 + i, b0 * y); 208 | caAtomicAdd(ga1 + i, b1 * y); 209 | caAtomicAdd(ga2 + i, b2 * y); 210 | } 211 | 212 | // Write the bary gradients. 213 | ((float4*)p.gradRaster)[pidx] = make_float4(gb0, gb1, 0.f, 0.f); 214 | 215 | // If pixel differentials disabled, we're done. 216 | if (!ENABLE_DA) 217 | return; 218 | 219 | // Calculate gradients based on attribute pixel differentials. 220 | const float2* dda = ((float2*)p.dda) + pidx * p.numDiffAttr; 221 | float gdudx = 0.f; 222 | float gdudy = 0.f; 223 | float gdvdx = 0.f; 224 | float gdvdy = 0.f; 225 | 226 | // Read bary pixel differentials. 227 | float4 db = ((float4*)p.rastDB)[pidx]; 228 | float dudx = db.x; 229 | float dudy = db.y; 230 | float dvdx = db.z; 231 | float dvdy = db.w; 232 | 233 | for (int i=0; i < p.numDiffAttr; i++) 234 | { 235 | // Input attribute index. 236 | int j = p.diff_attrs_all ? i : p.diffAttrs[i]; 237 | if (j < 0) 238 | j += p.numAttr; // Python-style negative indices. 239 | 240 | // Check that index is valid. 241 | if (j >= 0 && j < p.numAttr) 242 | { 243 | float2 dsdxy = dda[i]; 244 | float dsdx = dsdxy.x; 245 | float dsdy = dsdxy.y; 246 | 247 | float s0 = a0[j]; 248 | float s1 = a1[j]; 249 | float s2 = a2[j]; 250 | 251 | // Gradients of db. 252 | float dsdu = s0 - s2; 253 | float dsdv = s1 - s2; 254 | gdudx += dsdu * dsdx; 255 | gdudy += dsdu * dsdy; 256 | gdvdx += dsdv * dsdx; 257 | gdvdy += dsdv * dsdy; 258 | 259 | // Gradients of attributes. 260 | float du = dsdx*dudx + dsdy*dudy; 261 | float dv = dsdx*dvdx + dsdy*dvdy; 262 | caAtomicAdd(ga0 + j, du); 263 | caAtomicAdd(ga1 + j, dv); 264 | caAtomicAdd(ga2 + j, -du - dv); 265 | } 266 | } 267 | 268 | // Write. 269 | ((float4*)p.gradRasterDB)[pidx] = make_float4(gdudx, gdudy, gdvdx, gdvdy); 270 | } 271 | 272 | // Template specializations. 273 | __global__ void InterpolateGradKernel (const InterpolateKernelParams p) { InterpolateGradKernelTemplate(p); } 274 | __global__ void InterpolateGradKernelDa(const InterpolateKernelParams p) { InterpolateGradKernelTemplate(p); } 275 | 276 | //------------------------------------------------------------------------ 277 | -------------------------------------------------------------------------------- /nvdiffrast/common/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | //------------------------------------------------------------------------ 12 | // Constants and helpers. 13 | 14 | #define IP_FWD_MAX_KERNEL_BLOCK_WIDTH 8 15 | #define IP_FWD_MAX_KERNEL_BLOCK_HEIGHT 8 16 | #define IP_GRAD_MAX_KERNEL_BLOCK_WIDTH 8 17 | #define IP_GRAD_MAX_KERNEL_BLOCK_HEIGHT 8 18 | #define IP_MAX_DIFF_ATTRS 32 19 | 20 | //------------------------------------------------------------------------ 21 | // CUDA kernel params. 22 | 23 | struct InterpolateKernelParams 24 | { 25 | const int* tri; // Incoming triangle buffer. 26 | const float* attr; // Incoming attribute buffer. 27 | const float* rast; // Incoming rasterizer output buffer. 28 | const float* rastDB; // Incoming rasterizer output buffer for bary derivatives. 29 | const float* dy; // Incoming attribute gradients. 30 | const float* dda; // Incoming attr diff gradients. 31 | float* out; // Outgoing interpolated attributes. 32 | float* outDA; // Outgoing texcoord major axis lengths. 33 | float* gradAttr; // Outgoing attribute gradients. 34 | float* gradRaster; // Outgoing rasterizer gradients. 35 | float* gradRasterDB; // Outgoing rasterizer bary diff gradients. 36 | int numTriangles; // Number of triangles. 37 | int numVertices; // Number of vertices. 38 | int numAttr; // Number of total vertex attributes. 39 | int numDiffAttr; // Number of attributes to differentiate. 40 | int width; // Image width. 41 | int height; // Image height. 42 | int depth; // Minibatch size. 43 | int attrBC; // 0=normal, 1=attr is broadcast. 44 | int instance_mode; // 0=normal, 1=instance mode. 45 | int diff_attrs_all; // 0=normal, 1=produce pixel differentials for all attributes. 46 | int diffAttrs[IP_MAX_DIFF_ATTRS]; // List of attributes to differentiate. 47 | }; 48 | 49 | //------------------------------------------------------------------------ 50 | -------------------------------------------------------------------------------- /nvdiffrast/common/rasterize.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "common.h" 10 | #include "rasterize.h" 11 | 12 | //------------------------------------------------------------------------ 13 | // Cuda forward rasterizer pixel shader kernel. 14 | 15 | __global__ void RasterizeCudaFwdShaderKernel(const RasterizeCudaFwdShaderParams p) 16 | { 17 | // Calculate pixel position. 18 | int px = blockIdx.x * blockDim.x + threadIdx.x; 19 | int py = blockIdx.y * blockDim.y + threadIdx.y; 20 | int pz = blockIdx.z; 21 | if (px >= p.width_out || py >= p.height_out || pz >= p.depth) 22 | return; 23 | 24 | // Pixel indices. 25 | int pidx_in = px + p.width_in * (py + p.height_in * pz); 26 | int pidx_out = px + p.width_out * (py + p.height_out * pz); 27 | 28 | // Fetch triangle idx. 29 | int triIdx = p.in_idx[pidx_in] - 1; 30 | if (triIdx < 0 || triIdx >= p.numTriangles) 31 | { 32 | // No or corrupt triangle. 33 | ((float4*)p.out)[pidx_out] = make_float4(0.0, 0.0, 0.0, 0.0); // Clear out. 34 | ((float4*)p.out_db)[pidx_out] = make_float4(0.0, 0.0, 0.0, 0.0); // Clear out_db. 35 | return; 36 | } 37 | 38 | // Fetch vertex indices. 39 | int vi0 = p.tri[triIdx * 3 + 0]; 40 | int vi1 = p.tri[triIdx * 3 + 1]; 41 | int vi2 = p.tri[triIdx * 3 + 2]; 42 | 43 | // Bail out if vertex indices are corrupt. 44 | if (vi0 < 0 || vi0 >= p.numVertices || 45 | vi1 < 0 || vi1 >= p.numVertices || 46 | vi2 < 0 || vi2 >= p.numVertices) 47 | return; 48 | 49 | // In instance mode, adjust vertex indices by minibatch index. 50 | if (p.instance_mode) 51 | { 52 | vi0 += pz * p.numVertices; 53 | vi1 += pz * p.numVertices; 54 | vi2 += pz * p.numVertices; 55 | } 56 | 57 | // Fetch vertex positions. 58 | float4 p0 = ((float4*)p.pos)[vi0]; 59 | float4 p1 = ((float4*)p.pos)[vi1]; 60 | float4 p2 = ((float4*)p.pos)[vi2]; 61 | 62 | // Evaluate edge functions. 63 | float fx = p.xs * (float)px + p.xo; 64 | float fy = p.ys * (float)py + p.yo; 65 | float p0x = p0.x - fx * p0.w; 66 | float p0y = p0.y - fy * p0.w; 67 | float p1x = p1.x - fx * p1.w; 68 | float p1y = p1.y - fy * p1.w; 69 | float p2x = p2.x - fx * p2.w; 70 | float p2y = p2.y - fy * p2.w; 71 | float a0 = p1x*p2y - p1y*p2x; 72 | float a1 = p2x*p0y - p2y*p0x; 73 | float a2 = p0x*p1y - p0y*p1x; 74 | 75 | // Perspective correct, normalized barycentrics. 76 | float iw = 1.f / (a0 + a1 + a2); 77 | float b0 = a0 * iw; 78 | float b1 = a1 * iw; 79 | 80 | // Compute z/w for depth buffer. 81 | float z = p0.z * a0 + p1.z * a1 + p2.z * a2; 82 | float w = p0.w * a0 + p1.w * a1 + p2.w * a2; 83 | float zw = z / w; 84 | 85 | // Clamps to avoid NaNs. 86 | b0 = __saturatef(b0); // Clamp to [+0.0, 1.0]. 87 | b1 = __saturatef(b1); // Clamp to [+0.0, 1.0]. 88 | zw = fmaxf(fminf(zw, 1.f), -1.f); 89 | 90 | // Emit output. 91 | ((float4*)p.out)[pidx_out] = make_float4(b0, b1, zw, triidx_to_float(triIdx + 1)); 92 | 93 | // Calculate bary pixel differentials. 94 | float dfxdx = p.xs * iw; 95 | float dfydy = p.ys * iw; 96 | float da0dx = p2.y*p1.w - p1.y*p2.w; 97 | float da0dy = p1.x*p2.w - p2.x*p1.w; 98 | float da1dx = p0.y*p2.w - p2.y*p0.w; 99 | float da1dy = p2.x*p0.w - p0.x*p2.w; 100 | float da2dx = p1.y*p0.w - p0.y*p1.w; 101 | float da2dy = p0.x*p1.w - p1.x*p0.w; 102 | float datdx = da0dx + da1dx + da2dx; 103 | float datdy = da0dy + da1dy + da2dy; 104 | float dudx = dfxdx * (b0 * datdx - da0dx); 105 | float dudy = dfydy * (b0 * datdy - da0dy); 106 | float dvdx = dfxdx * (b1 * datdx - da1dx); 107 | float dvdy = dfydy * (b1 * datdy - da1dy); 108 | 109 | // Emit bary pixel differentials. 110 | ((float4*)p.out_db)[pidx_out] = make_float4(dudx, dudy, dvdx, dvdy); 111 | } 112 | 113 | //------------------------------------------------------------------------ 114 | // Gradient Cuda kernel. 115 | 116 | template 117 | static __forceinline__ __device__ void RasterizeGradKernelTemplate(const RasterizeGradParams p) 118 | { 119 | // Temporary space for coalesced atomics. 120 | CA_DECLARE_TEMP(RAST_GRAD_MAX_KERNEL_BLOCK_WIDTH * RAST_GRAD_MAX_KERNEL_BLOCK_HEIGHT); 121 | 122 | // Calculate pixel position. 123 | int px = blockIdx.x * blockDim.x + threadIdx.x; 124 | int py = blockIdx.y * blockDim.y + threadIdx.y; 125 | int pz = blockIdx.z; 126 | if (px >= p.width || py >= p.height || pz >= p.depth) 127 | return; 128 | 129 | // Pixel index. 130 | int pidx = px + p.width * (py + p.height * pz); 131 | 132 | // Read triangle idx and dy. 133 | float2 dy = ((float2*)p.dy)[pidx * 2]; 134 | float4 ddb = ENABLE_DB ? ((float4*)p.ddb)[pidx] : make_float4(0.f, 0.f, 0.f, 0.f); 135 | int triIdx = float_to_triidx(((float*)p.out)[pidx * 4 + 3]) - 1; 136 | 137 | // Exit if nothing to do. 138 | if (triIdx < 0 || triIdx >= p.numTriangles) 139 | return; // No or corrupt triangle. 140 | int grad_all_dy = __float_as_int(dy.x) | __float_as_int(dy.y); // Bitwise OR of all incoming gradients. 141 | int grad_all_ddb = 0; 142 | if (ENABLE_DB) 143 | grad_all_ddb = __float_as_int(ddb.x) | __float_as_int(ddb.y) | __float_as_int(ddb.z) | __float_as_int(ddb.w); 144 | if (((grad_all_dy | grad_all_ddb) << 1) == 0) 145 | return; // All incoming gradients are +0/-0. 146 | 147 | // Fetch vertex indices. 148 | int vi0 = p.tri[triIdx * 3 + 0]; 149 | int vi1 = p.tri[triIdx * 3 + 1]; 150 | int vi2 = p.tri[triIdx * 3 + 2]; 151 | 152 | // Bail out if vertex indices are corrupt. 153 | if (vi0 < 0 || vi0 >= p.numVertices || 154 | vi1 < 0 || vi1 >= p.numVertices || 155 | vi2 < 0 || vi2 >= p.numVertices) 156 | return; 157 | 158 | // In instance mode, adjust vertex indices by minibatch index. 159 | if (p.instance_mode) 160 | { 161 | vi0 += pz * p.numVertices; 162 | vi1 += pz * p.numVertices; 163 | vi2 += pz * p.numVertices; 164 | } 165 | 166 | // Initialize coalesced atomics. 167 | CA_SET_GROUP(triIdx); 168 | 169 | // Fetch vertex positions. 170 | float4 p0 = ((float4*)p.pos)[vi0]; 171 | float4 p1 = ((float4*)p.pos)[vi1]; 172 | float4 p2 = ((float4*)p.pos)[vi2]; 173 | 174 | // Evaluate edge functions. 175 | float fx = p.xs * (float)px + p.xo; 176 | float fy = p.ys * (float)py + p.yo; 177 | float p0x = p0.x - fx * p0.w; 178 | float p0y = p0.y - fy * p0.w; 179 | float p1x = p1.x - fx * p1.w; 180 | float p1y = p1.y - fy * p1.w; 181 | float p2x = p2.x - fx * p2.w; 182 | float p2y = p2.y - fy * p2.w; 183 | float a0 = p1x*p2y - p1y*p2x; 184 | float a1 = p2x*p0y - p2y*p0x; 185 | float a2 = p0x*p1y - p0y*p1x; 186 | 187 | // Compute inverse area with epsilon. 188 | float at = a0 + a1 + a2; 189 | float ep = copysignf(1e-6f, at); // ~1 pixel in 1k x 1k image. 190 | float iw = 1.f / (at + ep); 191 | 192 | // Perspective correct, normalized barycentrics. 193 | float b0 = a0 * iw; 194 | float b1 = a1 * iw; 195 | 196 | // Position gradients. 197 | float gb0 = dy.x * iw; 198 | float gb1 = dy.y * iw; 199 | float gbb = gb0 * b0 + gb1 * b1; 200 | float gp0x = gbb * (p2y - p1y) - gb1 * p2y; 201 | float gp1x = gbb * (p0y - p2y) + gb0 * p2y; 202 | float gp2x = gbb * (p1y - p0y) - gb0 * p1y + gb1 * p0y; 203 | float gp0y = gbb * (p1x - p2x) + gb1 * p2x; 204 | float gp1y = gbb * (p2x - p0x) - gb0 * p2x; 205 | float gp2y = gbb * (p0x - p1x) + gb0 * p1x - gb1 * p0x; 206 | float gp0w = -fx * gp0x - fy * gp0y; 207 | float gp1w = -fx * gp1x - fy * gp1y; 208 | float gp2w = -fx * gp2x - fy * gp2y; 209 | 210 | // Bary differential gradients. 211 | if (ENABLE_DB && ((grad_all_ddb) << 1) != 0) 212 | { 213 | float dfxdX = p.xs * iw; 214 | float dfydY = p.ys * iw; 215 | ddb.x *= dfxdX; 216 | ddb.y *= dfydY; 217 | ddb.z *= dfxdX; 218 | ddb.w *= dfydY; 219 | 220 | float da0dX = p1.y * p2.w - p2.y * p1.w; 221 | float da1dX = p2.y * p0.w - p0.y * p2.w; 222 | float da2dX = p0.y * p1.w - p1.y * p0.w; 223 | float da0dY = p2.x * p1.w - p1.x * p2.w; 224 | float da1dY = p0.x * p2.w - p2.x * p0.w; 225 | float da2dY = p1.x * p0.w - p0.x * p1.w; 226 | float datdX = da0dX + da1dX + da2dX; 227 | float datdY = da0dY + da1dY + da2dY; 228 | 229 | float x01 = p0.x - p1.x; 230 | float x12 = p1.x - p2.x; 231 | float x20 = p2.x - p0.x; 232 | float y01 = p0.y - p1.y; 233 | float y12 = p1.y - p2.y; 234 | float y20 = p2.y - p0.y; 235 | float w01 = p0.w - p1.w; 236 | float w12 = p1.w - p2.w; 237 | float w20 = p2.w - p0.w; 238 | 239 | float a0p1 = fy * p2.x - fx * p2.y; 240 | float a0p2 = fx * p1.y - fy * p1.x; 241 | float a1p0 = fx * p2.y - fy * p2.x; 242 | float a1p2 = fy * p0.x - fx * p0.y; 243 | 244 | float wdudX = 2.f * b0 * datdX - da0dX; 245 | float wdudY = 2.f * b0 * datdY - da0dY; 246 | float wdvdX = 2.f * b1 * datdX - da1dX; 247 | float wdvdY = 2.f * b1 * datdY - da1dY; 248 | 249 | float c0 = iw * (ddb.x * wdudX + ddb.y * wdudY + ddb.z * wdvdX + ddb.w * wdvdY); 250 | float cx = c0 * fx - ddb.x * b0 - ddb.z * b1; 251 | float cy = c0 * fy - ddb.y * b0 - ddb.w * b1; 252 | float cxy = iw * (ddb.x * datdX + ddb.y * datdY); 253 | float czw = iw * (ddb.z * datdX + ddb.w * datdY); 254 | 255 | gp0x += c0 * y12 - cy * w12 + czw * p2y + ddb.w * p2.w; 256 | gp1x += c0 * y20 - cy * w20 - cxy * p2y - ddb.y * p2.w; 257 | gp2x += c0 * y01 - cy * w01 + cxy * p1y - czw * p0y + ddb.y * p1.w - ddb.w * p0.w; 258 | gp0y += cx * w12 - c0 * x12 - czw * p2x - ddb.z * p2.w; 259 | gp1y += cx * w20 - c0 * x20 + cxy * p2x + ddb.x * p2.w; 260 | gp2y += cx * w01 - c0 * x01 - cxy * p1x + czw * p0x - ddb.x * p1.w + ddb.z * p0.w; 261 | gp0w += cy * x12 - cx * y12 - czw * a1p0 + ddb.z * p2.y - ddb.w * p2.x; 262 | gp1w += cy * x20 - cx * y20 - cxy * a0p1 - ddb.x * p2.y + ddb.y * p2.x; 263 | gp2w += cy * x01 - cx * y01 - cxy * a0p2 - czw * a1p2 + ddb.x * p1.y - ddb.y * p1.x - ddb.z * p0.y + ddb.w * p0.x; 264 | } 265 | 266 | // Accumulate using coalesced atomics. 267 | caAtomicAdd3_xyw(p.grad + 4 * vi0, gp0x, gp0y, gp0w); 268 | caAtomicAdd3_xyw(p.grad + 4 * vi1, gp1x, gp1y, gp1w); 269 | caAtomicAdd3_xyw(p.grad + 4 * vi2, gp2x, gp2y, gp2w); 270 | } 271 | 272 | // Template specializations. 273 | __global__ void RasterizeGradKernel (const RasterizeGradParams p) { RasterizeGradKernelTemplate(p); } 274 | __global__ void RasterizeGradKernelDb(const RasterizeGradParams p) { RasterizeGradKernelTemplate(p); } 275 | 276 | //------------------------------------------------------------------------ 277 | -------------------------------------------------------------------------------- /nvdiffrast/common/rasterize.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | //------------------------------------------------------------------------ 12 | // Constants and helpers. 13 | 14 | #define RAST_CUDA_FWD_SHADER_KERNEL_BLOCK_WIDTH 8 15 | #define RAST_CUDA_FWD_SHADER_KERNEL_BLOCK_HEIGHT 8 16 | #define RAST_GRAD_MAX_KERNEL_BLOCK_WIDTH 8 17 | #define RAST_GRAD_MAX_KERNEL_BLOCK_HEIGHT 8 18 | 19 | //------------------------------------------------------------------------ 20 | // CUDA forward rasterizer shader kernel params. 21 | 22 | struct RasterizeCudaFwdShaderParams 23 | { 24 | const float* pos; // Vertex positions. 25 | const int* tri; // Triangle indices. 26 | const int* in_idx; // Triangle idx buffer from rasterizer. 27 | float* out; // Main output buffer. 28 | float* out_db; // Bary pixel gradient output buffer. 29 | int numTriangles; // Number of triangles. 30 | int numVertices; // Number of vertices. 31 | int width_in; // Input image width. 32 | int height_in; // Input image height. 33 | int width_out; // Output image width. 34 | int height_out; // Output image height. 35 | int depth; // Size of minibatch. 36 | int instance_mode; // 1 if in instance rendering mode. 37 | float xs, xo, ys, yo; // Pixel position to clip-space x, y transform. 38 | }; 39 | 40 | //------------------------------------------------------------------------ 41 | // Gradient CUDA kernel params. 42 | 43 | struct RasterizeGradParams 44 | { 45 | const float* pos; // Incoming position buffer. 46 | const int* tri; // Incoming triangle buffer. 47 | const float* out; // Rasterizer output buffer. 48 | const float* dy; // Incoming gradients of rasterizer output buffer. 49 | const float* ddb; // Incoming gradients of bary diff output buffer. 50 | float* grad; // Outgoing position gradients. 51 | int numTriangles; // Number of triangles. 52 | int numVertices; // Number of vertices. 53 | int width; // Image width. 54 | int height; // Image height. 55 | int depth; // Size of minibatch. 56 | int instance_mode; // 1 if in instance rendering mode. 57 | float xs, xo, ys, yo; // Pixel position to clip-space x, y transform. 58 | }; 59 | 60 | //------------------------------------------------------------------------ 61 | -------------------------------------------------------------------------------- /nvdiffrast/common/rasterize_gl.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | //------------------------------------------------------------------------ 12 | // Do not try to include OpenGL stuff when compiling CUDA kernels for torch. 13 | 14 | #if !(defined(NVDR_TORCH) && defined(__CUDACC__)) 15 | #include "framework.h" 16 | #include "glutil.h" 17 | 18 | //------------------------------------------------------------------------ 19 | // OpenGL-related persistent state for forward op. 20 | 21 | struct RasterizeGLState // Must be initializable by memset to zero. 22 | { 23 | int width; // Allocated frame buffer width. 24 | int height; // Allocated frame buffer height. 25 | int depth; // Allocated frame buffer depth. 26 | int posCount; // Allocated position buffer in floats. 27 | int triCount; // Allocated triangle buffer in ints. 28 | GLContext glctx; 29 | GLuint glFBO; 30 | GLuint glColorBuffer[2]; 31 | GLuint glPrevOutBuffer; 32 | GLuint glDepthStencilBuffer; 33 | GLuint glVAO; 34 | GLuint glTriBuffer; 35 | GLuint glPosBuffer; 36 | GLuint glProgram; 37 | GLuint glProgramDP; 38 | GLuint glVertexShader; 39 | GLuint glGeometryShader; 40 | GLuint glFragmentShader; 41 | GLuint glFragmentShaderDP; 42 | cudaGraphicsResource_t cudaColorBuffer[2]; 43 | cudaGraphicsResource_t cudaPrevOutBuffer; 44 | cudaGraphicsResource_t cudaPosBuffer; 45 | cudaGraphicsResource_t cudaTriBuffer; 46 | int enableDB; 47 | int enableZModify; // Modify depth in shader, workaround for a rasterization issue on A100. 48 | }; 49 | 50 | //------------------------------------------------------------------------ 51 | // Shared C++ code prototypes. 52 | 53 | void rasterizeInitGLContext(NVDR_CTX_ARGS, RasterizeGLState& s, int cudaDeviceIdx); 54 | void rasterizeResizeBuffers(NVDR_CTX_ARGS, RasterizeGLState& s, bool& changes, int posCount, int triCount, int width, int height, int depth); 55 | void rasterizeRender(NVDR_CTX_ARGS, RasterizeGLState& s, cudaStream_t stream, const float* posPtr, int posCount, int vtxPerInstance, const int32_t* triPtr, int triCount, const int32_t* rangesPtr, int width, int height, int depth, int peeling_idx); 56 | void rasterizeCopyResults(NVDR_CTX_ARGS, RasterizeGLState& s, cudaStream_t stream, float** outputPtr, int width, int height, int depth); 57 | void rasterizeReleaseBuffers(NVDR_CTX_ARGS, RasterizeGLState& s); 58 | 59 | //------------------------------------------------------------------------ 60 | #endif // !(defined(NVDR_TORCH) && defined(__CUDACC__)) 61 | -------------------------------------------------------------------------------- /nvdiffrast/common/texture.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "framework.h" 10 | #include "texture.h" 11 | 12 | //------------------------------------------------------------------------ 13 | // Mip stack construction and access helpers. 14 | 15 | void raiseMipSizeError(NVDR_CTX_ARGS, const TextureKernelParams& p) 16 | { 17 | char buf[1024]; 18 | int bufsz = 1024; 19 | 20 | std::string msg = "Mip-map size error - cannot downsample an odd extent greater than 1. Resize the texture so that both spatial extents are powers of two, or limit the number of mip maps using max_mip_level argument.\n"; 21 | 22 | int w = p.texWidth; 23 | int h = p.texHeight; 24 | bool ew = false; 25 | bool eh = false; 26 | 27 | msg += "Attempted mip stack construction:\n"; 28 | msg += "level width height\n"; 29 | msg += "----- ----- ------\n"; 30 | snprintf(buf, bufsz, "base %5d %5d\n", w, h); 31 | msg += buf; 32 | 33 | int mipTotal = 0; 34 | int level = 0; 35 | while ((w|h) > 1 && !(ew || eh)) // Stop at first impossible size. 36 | { 37 | // Current level. 38 | level += 1; 39 | 40 | // Determine if downsampling fails. 41 | ew = ew || (w > 1 && (w & 1)); 42 | eh = eh || (h > 1 && (h & 1)); 43 | 44 | // Downsample. 45 | if (w > 1) w >>= 1; 46 | if (h > 1) h >>= 1; 47 | 48 | // Append level size to error message. 49 | snprintf(buf, bufsz, "mip %-2d ", level); 50 | msg += buf; 51 | if (ew) snprintf(buf, bufsz, " err "); 52 | else snprintf(buf, bufsz, "%5d ", w); 53 | msg += buf; 54 | if (eh) snprintf(buf, bufsz, " err\n"); 55 | else snprintf(buf, bufsz, "%5d\n", h); 56 | msg += buf; 57 | } 58 | 59 | NVDR_CHECK(0, msg); 60 | } 61 | 62 | int calculateMipInfo(NVDR_CTX_ARGS, TextureKernelParams& p, int* mipOffsets) 63 | { 64 | // No levels at all? 65 | if (p.mipLevelLimit == 0) 66 | { 67 | p.mipLevelMax = 0; 68 | return 0; 69 | } 70 | 71 | // Current level size. 72 | int w = p.texWidth; 73 | int h = p.texHeight; 74 | 75 | int mipTotal = 0; 76 | int level = 0; 77 | int c = (p.boundaryMode == TEX_BOUNDARY_MODE_CUBE) ? (p.channels * 6) : p.channels; 78 | mipOffsets[0] = 0; 79 | while ((w|h) > 1) 80 | { 81 | // Current level. 82 | level += 1; 83 | 84 | // Quit if cannot downsample. 85 | if ((w > 1 && (w & 1)) || (h > 1 && (h & 1))) 86 | raiseMipSizeError(NVDR_CTX_PARAMS, p); 87 | 88 | // Downsample. 89 | if (w > 1) w >>= 1; 90 | if (h > 1) h >>= 1; 91 | 92 | mipOffsets[level] = mipTotal; // Store the mip offset (#floats). 93 | mipTotal += w * h * p.texDepth * c; 94 | 95 | // Hit the level limit? 96 | if (p.mipLevelLimit >= 0 && level == p.mipLevelLimit) 97 | break; 98 | } 99 | 100 | p.mipLevelMax = level; 101 | return mipTotal; 102 | } 103 | 104 | //------------------------------------------------------------------------ 105 | -------------------------------------------------------------------------------- /nvdiffrast/common/texture.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include "framework.h" 11 | 12 | //------------------------------------------------------------------------ 13 | // Constants. 14 | 15 | #define TEX_DEBUG_MIP_RETAIN_VARIANCE 0 // For debugging 16 | #define TEX_FWD_MAX_KERNEL_BLOCK_WIDTH 8 17 | #define TEX_FWD_MAX_KERNEL_BLOCK_HEIGHT 8 18 | #define TEX_FWD_MAX_MIP_KERNEL_BLOCK_WIDTH 8 19 | #define TEX_FWD_MAX_MIP_KERNEL_BLOCK_HEIGHT 8 20 | #define TEX_GRAD_MAX_KERNEL_BLOCK_WIDTH 8 21 | #define TEX_GRAD_MAX_KERNEL_BLOCK_HEIGHT 8 22 | #define TEX_GRAD_MAX_MIP_KERNEL_BLOCK_WIDTH 8 23 | #define TEX_GRAD_MAX_MIP_KERNEL_BLOCK_HEIGHT 8 24 | #define TEX_MAX_MIP_LEVEL 16 // Currently a texture cannot be larger than 2 GB because we use 32-bit indices everywhere. 25 | #define TEX_MODE_NEAREST 0 // Nearest on base level. 26 | #define TEX_MODE_LINEAR 1 // Bilinear on base level. 27 | #define TEX_MODE_LINEAR_MIPMAP_NEAREST 2 // Bilinear on nearest mip level. 28 | #define TEX_MODE_LINEAR_MIPMAP_LINEAR 3 // Trilinear. 29 | #define TEX_MODE_COUNT 4 30 | #define TEX_BOUNDARY_MODE_CUBE 0 // Cube map mode. 31 | #define TEX_BOUNDARY_MODE_WRAP 1 // Wrap (u, v). 32 | #define TEX_BOUNDARY_MODE_CLAMP 2 // Clamp (u, v). 33 | #define TEX_BOUNDARY_MODE_ZERO 3 // Pad with zeros. 34 | #define TEX_BOUNDARY_MODE_COUNT 4 35 | 36 | //------------------------------------------------------------------------ 37 | // CUDA kernel params. 38 | 39 | struct TextureKernelParams 40 | { 41 | const float* tex[TEX_MAX_MIP_LEVEL]; // Incoming texture buffer with mip levels. 42 | const float* uv; // Incoming texcoord buffer. 43 | const float* uvDA; // Incoming uv pixel diffs or NULL. 44 | const float* mipLevelBias; // Incoming mip level bias or NULL. 45 | const float* dy; // Incoming output gradient. 46 | float* out; // Outgoing texture data. 47 | float* gradTex[TEX_MAX_MIP_LEVEL]; // Outgoing texture gradients with mip levels. 48 | float* gradUV; // Outgoing texcoord gradient. 49 | float* gradUVDA; // Outgoing texcoord pixel differential gradient. 50 | float* gradMipLevelBias; // Outgoing mip level bias gradient. 51 | int enableMip; // If true, we have uv_da and/or mip_level_bias input(s), and a mip tensor. 52 | int filterMode; // One of the TEX_MODE_ constants. 53 | int boundaryMode; // One of the TEX_BOUNDARY_MODE_ contants. 54 | int texConst; // If true, texture is known to be constant. 55 | int mipLevelLimit; // Mip level limit coming from the op. 56 | int channels; // Number of texture channels. 57 | int imgWidth; // Image width. 58 | int imgHeight; // Image height. 59 | int texWidth; // Texture width. 60 | int texHeight; // Texture height. 61 | int texDepth; // Texture depth. 62 | int n; // Minibatch size. 63 | int mipLevelMax; // Maximum mip level index. Zero if mips disabled. 64 | int mipLevelOut; // Mip level being calculated in builder kernel. 65 | }; 66 | 67 | //------------------------------------------------------------------------ 68 | // C++ helper function prototypes. 69 | 70 | void raiseMipSizeError(NVDR_CTX_ARGS, const TextureKernelParams& p); 71 | int calculateMipInfo(NVDR_CTX_ARGS, TextureKernelParams& p, int* mipOffsets); 72 | 73 | //------------------------------------------------------------------------ 74 | // Macros. 75 | 76 | #define mipLevelSize(p, i) make_int2(((p).texWidth >> (i)) > 1 ? ((p).texWidth >> (i)) : 1, ((p).texHeight >> (i)) > 1 ? ((p).texHeight >> (i)) : 1) 77 | 78 | //------------------------------------------------------------------------ 79 | -------------------------------------------------------------------------------- /nvdiffrast/lib/setgpu.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/nvdiffrast/lib/setgpu.lib -------------------------------------------------------------------------------- /nvdiffrast/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .ops import rasterize, interpolate, texture, antialias 10 | from .plugin_loader import set_cache_dir 11 | 12 | __all__ = ["rasterize", "interpolate", "texture", "antialias", "set_cache_dir"] 13 | -------------------------------------------------------------------------------- /nvdiffrast/tensorflow/plugin_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import glob 10 | import os 11 | import re 12 | import uuid 13 | import hashlib 14 | import tempfile 15 | import shutil 16 | import tensorflow as tf 17 | from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module 18 | 19 | #---------------------------------------------------------------------------- 20 | # Global options. 21 | 22 | _nvdiffrast_cache_dir = None 23 | 24 | def set_cache_dir(path: str) -> None: 25 | '''Set CUDA kernel compilation temp dir. 26 | 27 | If `set_cache_dir` is not called, the cache directory will default to 28 | one of the below: 29 | 30 | - Value of NVDIFFRAST_CACHE_DIR env var, if set 31 | - $HOME/.cache/nvdiffrast if HOME env var is set 32 | - $USERPROFILE/.cache/nvdiffrast if USERPROFILE is set. 33 | 34 | Args: 35 | path: Where to save CUDA kernel build temporaries 36 | ''' 37 | global _nvdiffrast_cache_dir 38 | _nvdiffrast_cache_dir = path 39 | 40 | def make_cache_dir_path(*paths: str) -> str: 41 | if _nvdiffrast_cache_dir is not None: 42 | return os.path.join(_nvdiffrast_cache_dir, *paths) 43 | if 'NVDIFFRAST_CACHE_DIR' in os.environ: 44 | return os.path.join(os.environ['NVDIFFRAST_CACHE_DIR'], *paths) 45 | if 'HOME' in os.environ: 46 | return os.path.join(os.environ['HOME'], '.cache', 'nvdiffrast', *paths) 47 | if 'USERPROFILE' in os.environ: 48 | return os.path.join(os.environ['USERPROFILE'], '.cache', 'nvdiffrast', *paths) 49 | return os.path.join(tempfile.gettempdir(), '.cache', 'nvdiffrast', *paths) 50 | 51 | cuda_cache_version_tag = 'v1' 52 | do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe! 53 | verbose = True # Print status messages to stdout. 54 | 55 | #---------------------------------------------------------------------------- 56 | # Internal helper funcs. 57 | 58 | def _find_compiler_bindir(): 59 | hostx64_paths = sorted(glob.glob('C:/Program Files/Microsoft Visual Studio/*/Enterprise/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) 60 | if hostx64_paths != []: 61 | return hostx64_paths[0] 62 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Enterprise/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) 63 | if hostx64_paths != []: 64 | return hostx64_paths[0] 65 | hostx64_paths = sorted(glob.glob('C:/Program Files/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) 66 | if hostx64_paths != []: 67 | return hostx64_paths[0] 68 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) 69 | if hostx64_paths != []: 70 | return hostx64_paths[0] 71 | hostx64_paths = sorted(glob.glob('C:/Program Files/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) 72 | if hostx64_paths != []: 73 | return hostx64_paths[0] 74 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) 75 | if hostx64_paths != []: 76 | return hostx64_paths[0] 77 | hostx64_paths = sorted(glob.glob('C:/Program Files/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) 78 | if hostx64_paths != []: 79 | return hostx64_paths[0] 80 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) 81 | if hostx64_paths != []: 82 | return hostx64_paths[0] 83 | vc_bin_dir = 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin' 84 | if os.path.isdir(vc_bin_dir): 85 | return vc_bin_dir 86 | return None 87 | 88 | def _get_compute_cap(device): 89 | caps_str = device.physical_device_desc 90 | m = re.search('compute capability: (\\d+).(\\d+)', caps_str) 91 | major = m.group(1) 92 | minor = m.group(2) 93 | return (major, minor) 94 | 95 | def _get_cuda_gpu_arch_string(): 96 | gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] 97 | if len(gpus) == 0: 98 | raise RuntimeError('No GPU devices found') 99 | (major, minor) = _get_compute_cap(gpus[0]) 100 | return 'sm_%s%s' % (major, minor) 101 | 102 | def _run_cmd(cmd): 103 | with os.popen(cmd) as pipe: 104 | output = pipe.read() 105 | status = pipe.close() 106 | if status is not None: 107 | raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) 108 | 109 | def _prepare_nvcc_cli(opts): 110 | cmd = 'nvcc ' + opts.strip() 111 | cmd += ' --disable-warnings' 112 | cmd += ' --include-path "%s"' % tf.sysconfig.get_include() 113 | cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') 114 | cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') 115 | cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') 116 | 117 | compiler_bindir = _find_compiler_bindir() 118 | if compiler_bindir is None: 119 | # Require that _find_compiler_bindir succeeds on Windows. Allow 120 | # nvcc to use whatever is the default on Linux. 121 | if os.name == 'nt': 122 | raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) 123 | else: 124 | cmd += ' --compiler-bindir "%s"' % compiler_bindir 125 | cmd += ' 2>&1' 126 | return cmd 127 | 128 | #---------------------------------------------------------------------------- 129 | # Main entry point. 130 | 131 | _plugin_cache = dict() 132 | 133 | def get_plugin(cuda_file, extra_nvcc_options=[]): 134 | cuda_file_base = os.path.basename(cuda_file) 135 | cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) 136 | 137 | # Already in cache? 138 | if cuda_file in _plugin_cache: 139 | return _plugin_cache[cuda_file] 140 | 141 | # Setup plugin. 142 | if verbose: 143 | print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) 144 | try: 145 | # Hash CUDA source. 146 | md5 = hashlib.md5() 147 | with open(cuda_file, 'rb') as f: 148 | md5.update(f.read()) 149 | md5.update(b'\n') 150 | 151 | # Hash headers included by the CUDA code by running it through the preprocessor. 152 | if not do_not_hash_included_headers: 153 | if verbose: 154 | print('Preprocessing... ', end='', flush=True) 155 | with tempfile.TemporaryDirectory() as tmp_dir: 156 | tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) 157 | _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) 158 | with open(tmp_file, 'rb') as f: 159 | bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros 160 | good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') 161 | for ln in f: 162 | if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas 163 | ln = ln.replace(bad_file_str, good_file_str) 164 | md5.update(ln) 165 | md5.update(b'\n') 166 | 167 | # Select compiler options. 168 | compile_opts = '' 169 | if os.name == 'nt': 170 | compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') 171 | compile_opts += ' --library-path="%s"' % (os.path.dirname(__file__) + r"\..\lib") # Find libraries during compilation. 172 | elif os.name == 'posix': 173 | compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so') 174 | compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\'' 175 | else: 176 | assert False # not Windows or Linux, w00t? 177 | compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string() 178 | compile_opts += ' --use_fast_math' 179 | for opt in extra_nvcc_options: 180 | compile_opts += ' ' + opt 181 | nvcc_cmd = _prepare_nvcc_cli(compile_opts) 182 | 183 | # Hash build configuration. 184 | md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') 185 | md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') 186 | md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') 187 | 188 | # Compile if not already compiled. 189 | bin_file_ext = '.dll' if os.name == 'nt' else '.so' 190 | cuda_cache_path = make_cache_dir_path() 191 | bin_file = os.path.join(make_cache_dir_path(), cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) 192 | if not os.path.isfile(bin_file): 193 | if verbose: 194 | print('Compiling... ', end='', flush=True) 195 | with tempfile.TemporaryDirectory() as tmp_dir: 196 | tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) 197 | _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) 198 | os.makedirs(cuda_cache_path, exist_ok=True) 199 | intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) 200 | shutil.copyfile(tmp_file, intermediate_file) 201 | os.rename(intermediate_file, bin_file) # atomic 202 | 203 | # Load. 204 | if verbose: 205 | print('Loading... ', end='', flush=True) 206 | plugin = tf.load_op_library(bin_file) 207 | 208 | # Add to cache. 209 | _plugin_cache[cuda_file] = plugin 210 | if verbose: 211 | print('Done.', flush=True) 212 | return plugin 213 | 214 | except: 215 | if verbose: 216 | print('Failed!', flush=True) 217 | raise 218 | 219 | #---------------------------------------------------------------------------- 220 | -------------------------------------------------------------------------------- /nvdiffrast/tensorflow/tf_all.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | // TF-specific helpers. 10 | 11 | #define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal("Cuda error: ", cudaGetErrorName(err), "[", #CUDA_CALL, ";]")); } while (0) 12 | #define OP_CHECK_GL_ERROR(CTX, GL_CALL) do { GL_CALL; GLenum err = glGetError(); OP_REQUIRES(CTX, err == GL_NO_ERROR, errors::Internal("OpenGL error: ", getGLErrorString(err), "[", #GL_CALL, ";]")); } while (0) 13 | 14 | // Cuda kernels and CPP all together. What an absolute compilation unit. 15 | 16 | #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ 17 | #include "../common/framework.h" 18 | #include "../common/glutil.cpp" 19 | 20 | #include "../common/common.h" 21 | #include "../common/common.cpp" 22 | 23 | #include "../common/rasterize.h" 24 | #include "../common/rasterize_gl.cpp" 25 | #include "../common/rasterize.cu" 26 | #include "tf_rasterize.cu" 27 | 28 | #include "../common/interpolate.cu" 29 | #include "tf_interpolate.cu" 30 | 31 | #include "../common/texture.cpp" 32 | #include "../common/texture.cu" 33 | #include "tf_texture.cu" 34 | 35 | #include "../common/antialias.cu" 36 | #include "tf_antialias.cu" 37 | -------------------------------------------------------------------------------- /nvdiffrast/torch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .ops import RasterizeCudaContext, RasterizeGLContext, get_log_level, set_log_level, rasterize, DepthPeeler, interpolate, texture, texture_construct_mip, antialias, antialias_construct_topology_hash 10 | __all__ = ["RasterizeCudaContext", "RasterizeGLContext", "get_log_level", "set_log_level", "rasterize", "DepthPeeler", "interpolate", "texture", "texture_construct_mip", "antialias", "antialias_construct_topology_hash"] 11 | -------------------------------------------------------------------------------- /nvdiffrast/torch/torch_bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "torch_common.inl" 10 | #include "torch_types.h" 11 | #include 12 | 13 | //------------------------------------------------------------------------ 14 | // Op prototypes. Return type macros for readability. 15 | 16 | #define OP_RETURN_T torch::Tensor 17 | #define OP_RETURN_TT std::tuple 18 | #define OP_RETURN_TTT std::tuple 19 | #define OP_RETURN_TTTT std::tuple 20 | #define OP_RETURN_TTV std::tuple > 21 | #define OP_RETURN_TTTTV std::tuple > 22 | 23 | OP_RETURN_TT rasterize_fwd_cuda (RasterizeCRStateWrapper& stateWrapper, torch::Tensor pos, torch::Tensor tri, std::tuple resolution, torch::Tensor ranges, int peeling_idx); 24 | OP_RETURN_T rasterize_grad (torch::Tensor pos, torch::Tensor tri, torch::Tensor out, torch::Tensor dy); 25 | OP_RETURN_T rasterize_grad_db (torch::Tensor pos, torch::Tensor tri, torch::Tensor out, torch::Tensor dy, torch::Tensor ddb); 26 | OP_RETURN_TT interpolate_fwd (torch::Tensor attr, torch::Tensor rast, torch::Tensor tri); 27 | OP_RETURN_TT interpolate_fwd_da (torch::Tensor attr, torch::Tensor rast, torch::Tensor tri, torch::Tensor rast_db, bool diff_attrs_all, std::vector& diff_attrs_vec); 28 | OP_RETURN_TT interpolate_grad (torch::Tensor attr, torch::Tensor rast, torch::Tensor tri, torch::Tensor dy); 29 | OP_RETURN_TTT interpolate_grad_da (torch::Tensor attr, torch::Tensor rast, torch::Tensor tri, torch::Tensor dy, torch::Tensor rast_db, torch::Tensor dda, bool diff_attrs_all, std::vector& diff_attrs_vec); 30 | TextureMipWrapper texture_construct_mip (torch::Tensor tex, int max_mip_level, bool cube_mode); 31 | OP_RETURN_T texture_fwd (torch::Tensor tex, torch::Tensor uv, int filter_mode, int boundary_mode); 32 | OP_RETURN_T texture_fwd_mip (torch::Tensor tex, torch::Tensor uv, torch::Tensor uv_da, torch::Tensor mip_level_bias, TextureMipWrapper mip_wrapper, std::vector mip_stack, int filter_mode, int boundary_mode); 33 | OP_RETURN_T texture_grad_nearest (torch::Tensor tex, torch::Tensor uv, torch::Tensor dy, int filter_mode, int boundary_mode); 34 | OP_RETURN_TT texture_grad_linear (torch::Tensor tex, torch::Tensor uv, torch::Tensor dy, int filter_mode, int boundary_mode); 35 | OP_RETURN_TTV texture_grad_linear_mipmap_nearest (torch::Tensor tex, torch::Tensor uv, torch::Tensor dy, torch::Tensor uv_da, torch::Tensor mip_level_bias, TextureMipWrapper mip_wrapper, std::vector mip_stack, int filter_mode, int boundary_mode); 36 | OP_RETURN_TTTTV texture_grad_linear_mipmap_linear (torch::Tensor tex, torch::Tensor uv, torch::Tensor dy, torch::Tensor uv_da, torch::Tensor mip_level_bias, TextureMipWrapper mip_wrapper, std::vector mip_stack, int filter_mode, int boundary_mode); 37 | TopologyHashWrapper antialias_construct_topology_hash (torch::Tensor tri); 38 | OP_RETURN_TT antialias_fwd (torch::Tensor color, torch::Tensor rast, torch::Tensor pos, torch::Tensor tri, TopologyHashWrapper topology_hash); 39 | OP_RETURN_TT antialias_grad (torch::Tensor color, torch::Tensor rast, torch::Tensor pos, torch::Tensor tri, torch::Tensor dy, torch::Tensor work_buffer); 40 | 41 | //------------------------------------------------------------------------ 42 | 43 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 44 | // State classes. 45 | pybind11::class_(m, "RasterizeCRStateWrapper").def(pybind11::init()); 46 | pybind11::class_(m, "TextureMipWrapper").def(pybind11::init<>()); 47 | pybind11::class_(m, "TopologyHashWrapper"); 48 | 49 | // Plumbing to torch/c10 logging system. 50 | m.def("get_log_level", [](void) { return FLAGS_caffe2_log_level; }, "get log level"); 51 | m.def("set_log_level", [](int level){ FLAGS_caffe2_log_level = level; }, "set log level"); 52 | 53 | // Ops. 54 | m.def("rasterize_fwd_cuda", &rasterize_fwd_cuda, "rasterize forward op (cuda)"); 55 | m.def("rasterize_grad", &rasterize_grad, "rasterize gradient op ignoring db gradients"); 56 | m.def("rasterize_grad_db", &rasterize_grad_db, "rasterize gradient op with db gradients"); 57 | m.def("interpolate_fwd", &interpolate_fwd, "interpolate forward op with attribute derivatives"); 58 | m.def("interpolate_fwd_da", &interpolate_fwd_da, "interpolate forward op without attribute derivatives"); 59 | m.def("interpolate_grad", &interpolate_grad, "interpolate gradient op with attribute derivatives"); 60 | m.def("interpolate_grad_da", &interpolate_grad_da, "interpolate gradient op without attribute derivatives"); 61 | m.def("texture_construct_mip", &texture_construct_mip, "texture mipmap construction"); 62 | m.def("texture_fwd", &texture_fwd, "texture forward op without mipmapping"); 63 | m.def("texture_fwd_mip", &texture_fwd_mip, "texture forward op with mipmapping"); 64 | m.def("texture_grad_nearest", &texture_grad_nearest, "texture gradient op in nearest mode"); 65 | m.def("texture_grad_linear", &texture_grad_linear, "texture gradient op in linear mode"); 66 | m.def("texture_grad_linear_mipmap_nearest", &texture_grad_linear_mipmap_nearest, "texture gradient op in linear-mipmap-nearest mode"); 67 | m.def("texture_grad_linear_mipmap_linear", &texture_grad_linear_mipmap_linear, "texture gradient op in linear-mipmap-linear mode"); 68 | m.def("antialias_construct_topology_hash", &antialias_construct_topology_hash, "antialias topology hash construction"); 69 | m.def("antialias_fwd", &antialias_fwd, "antialias forward op"); 70 | m.def("antialias_grad", &antialias_grad, "antialias gradient op"); 71 | } 72 | 73 | //------------------------------------------------------------------------ 74 | -------------------------------------------------------------------------------- /nvdiffrast/torch/torch_bindings_gl.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "torch_common.inl" 10 | #include "torch_types.h" 11 | #include 12 | 13 | //------------------------------------------------------------------------ 14 | // Op prototypes. 15 | 16 | std::tuple rasterize_fwd_gl(RasterizeGLStateWrapper& stateWrapper, torch::Tensor pos, torch::Tensor tri, std::tuple resolution, torch::Tensor ranges, int peeling_idx); 17 | 18 | //------------------------------------------------------------------------ 19 | 20 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 21 | // State classes. 22 | pybind11::class_(m, "RasterizeGLStateWrapper").def(pybind11::init()) 23 | .def("set_context", &RasterizeGLStateWrapper::setContext) 24 | .def("release_context", &RasterizeGLStateWrapper::releaseContext); 25 | 26 | // Ops. 27 | m.def("rasterize_fwd_gl", &rasterize_fwd_gl, "rasterize forward op (opengl)"); 28 | } 29 | 30 | //------------------------------------------------------------------------ 31 | -------------------------------------------------------------------------------- /nvdiffrast/torch/torch_common.inl: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include "../common/framework.h" 11 | 12 | //------------------------------------------------------------------------ 13 | // Input check helpers. 14 | //------------------------------------------------------------------------ 15 | 16 | #ifdef _MSC_VER 17 | #define __func__ __FUNCTION__ 18 | #endif 19 | 20 | #define NVDR_CHECK_DEVICE(...) do { TORCH_CHECK(at::cuda::check_device({__VA_ARGS__}), __func__, "(): Inputs " #__VA_ARGS__ " must reside on the same GPU device") } while(0) 21 | #define NVDR_CHECK_CPU(...) do { nvdr_check_cpu({__VA_ARGS__}, __func__, "(): Inputs " #__VA_ARGS__ " must reside on CPU"); } while(0) 22 | #define NVDR_CHECK_CONTIGUOUS(...) do { nvdr_check_contiguous({__VA_ARGS__}, __func__, "(): Inputs " #__VA_ARGS__ " must be contiguous tensors"); } while(0) 23 | #define NVDR_CHECK_F32(...) do { nvdr_check_f32({__VA_ARGS__}, __func__, "(): Inputs " #__VA_ARGS__ " must be float32 tensors"); } while(0) 24 | #define NVDR_CHECK_I32(...) do { nvdr_check_i32({__VA_ARGS__}, __func__, "(): Inputs " #__VA_ARGS__ " must be int32 tensors"); } while(0) 25 | inline void nvdr_check_cpu(at::ArrayRef ts, const char* func, const char* err_msg) { for (const at::Tensor& t : ts) TORCH_CHECK(t.device().type() == c10::DeviceType::CPU, func, err_msg); } 26 | inline void nvdr_check_contiguous(at::ArrayRef ts, const char* func, const char* err_msg) { for (const at::Tensor& t : ts) TORCH_CHECK(t.is_contiguous(), func, err_msg); } 27 | inline void nvdr_check_f32(at::ArrayRef ts, const char* func, const char* err_msg) { for (const at::Tensor& t : ts) TORCH_CHECK(t.dtype() == torch::kFloat32, func, err_msg); } 28 | inline void nvdr_check_i32(at::ArrayRef ts, const char* func, const char* err_msg) { for (const at::Tensor& t : ts) TORCH_CHECK(t.dtype() == torch::kInt32, func, err_msg); } 29 | //------------------------------------------------------------------------ 30 | -------------------------------------------------------------------------------- /nvdiffrast/torch/torch_rasterize_gl.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "torch_common.inl" 10 | #include "torch_types.h" 11 | #include "../common/common.h" 12 | #include "../common/rasterize_gl.h" 13 | #include 14 | 15 | //------------------------------------------------------------------------ 16 | // Python GL state wrapper methods. 17 | 18 | RasterizeGLStateWrapper::RasterizeGLStateWrapper(bool enableDB, bool automatic_, int cudaDeviceIdx_) 19 | { 20 | pState = new RasterizeGLState(); 21 | automatic = automatic_; 22 | cudaDeviceIdx = cudaDeviceIdx_; 23 | memset(pState, 0, sizeof(RasterizeGLState)); 24 | pState->enableDB = enableDB ? 1 : 0; 25 | rasterizeInitGLContext(NVDR_CTX_PARAMS, *pState, cudaDeviceIdx_); 26 | releaseGLContext(); 27 | } 28 | 29 | RasterizeGLStateWrapper::~RasterizeGLStateWrapper(void) 30 | { 31 | setGLContext(pState->glctx); 32 | rasterizeReleaseBuffers(NVDR_CTX_PARAMS, *pState); 33 | releaseGLContext(); 34 | destroyGLContext(pState->glctx); 35 | delete pState; 36 | } 37 | 38 | void RasterizeGLStateWrapper::setContext(void) 39 | { 40 | setGLContext(pState->glctx); 41 | } 42 | 43 | void RasterizeGLStateWrapper::releaseContext(void) 44 | { 45 | releaseGLContext(); 46 | } 47 | 48 | //------------------------------------------------------------------------ 49 | // Forward op (OpenGL). 50 | 51 | std::tuple rasterize_fwd_gl(RasterizeGLStateWrapper& stateWrapper, torch::Tensor pos, torch::Tensor tri, std::tuple resolution, torch::Tensor ranges, int peeling_idx) 52 | { 53 | const at::cuda::OptionalCUDAGuard device_guard(device_of(pos)); 54 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 55 | RasterizeGLState& s = *stateWrapper.pState; 56 | 57 | // Check inputs. 58 | NVDR_CHECK_DEVICE(pos, tri); 59 | NVDR_CHECK_CPU(ranges); 60 | NVDR_CHECK_CONTIGUOUS(pos, tri, ranges); 61 | NVDR_CHECK_F32(pos); 62 | NVDR_CHECK_I32(tri, ranges); 63 | 64 | // Check that GL context was created for the correct GPU. 65 | NVDR_CHECK(pos.get_device() == stateWrapper.cudaDeviceIdx, "GL context must must reside on the same device as input tensors"); 66 | 67 | // Determine number of outputs 68 | int num_outputs = s.enableDB ? 2 : 1; 69 | 70 | // Determine instance mode and check input dimensions. 71 | bool instance_mode = pos.sizes().size() > 2; 72 | if (instance_mode) 73 | NVDR_CHECK(pos.sizes().size() == 3 && pos.size(0) > 0 && pos.size(1) > 0 && pos.size(2) == 4, "instance mode - pos must have shape [>0, >0, 4]"); 74 | else 75 | { 76 | NVDR_CHECK(pos.sizes().size() == 2 && pos.size(0) > 0 && pos.size(1) == 4, "range mode - pos must have shape [>0, 4]"); 77 | NVDR_CHECK(ranges.sizes().size() == 2 && ranges.size(0) > 0 && ranges.size(1) == 2, "range mode - ranges must have shape [>0, 2]"); 78 | } 79 | NVDR_CHECK(tri.sizes().size() == 2 && tri.size(0) > 0 && tri.size(1) == 3, "tri must have shape [>0, 3]"); 80 | 81 | // Get output shape. 82 | int height = std::get<0>(resolution); 83 | int width = std::get<1>(resolution); 84 | int depth = instance_mode ? pos.size(0) : ranges.size(0); 85 | NVDR_CHECK(height > 0 && width > 0, "resolution must be [>0, >0]"); 86 | 87 | // Get position and triangle buffer sizes in int32/float32. 88 | int posCount = 4 * pos.size(0) * (instance_mode ? pos.size(1) : 1); 89 | int triCount = 3 * tri.size(0); 90 | 91 | // Set the GL context unless manual context. 92 | if (stateWrapper.automatic) 93 | setGLContext(s.glctx); 94 | 95 | // Resize all buffers. 96 | bool changes = false; 97 | rasterizeResizeBuffers(NVDR_CTX_PARAMS, s, changes, posCount, triCount, width, height, depth); 98 | if (changes) 99 | { 100 | #ifdef _WIN32 101 | // Workaround for occasional blank first frame on Windows. 102 | releaseGLContext(); 103 | setGLContext(s.glctx); 104 | #endif 105 | } 106 | 107 | // Copy input data to GL and render. 108 | const float* posPtr = pos.data_ptr(); 109 | const int32_t* rangesPtr = instance_mode ? 0 : ranges.data_ptr(); // This is in CPU memory. 110 | const int32_t* triPtr = tri.data_ptr(); 111 | int vtxPerInstance = instance_mode ? pos.size(1) : 0; 112 | rasterizeRender(NVDR_CTX_PARAMS, s, stream, posPtr, posCount, vtxPerInstance, triPtr, triCount, rangesPtr, width, height, depth, peeling_idx); 113 | 114 | // Allocate output tensors. 115 | torch::TensorOptions opts = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA); 116 | torch::Tensor out = torch::empty({depth, height, width, 4}, opts); 117 | torch::Tensor out_db = torch::empty({depth, height, width, s.enableDB ? 4 : 0}, opts); 118 | float* outputPtr[2]; 119 | outputPtr[0] = out.data_ptr(); 120 | outputPtr[1] = s.enableDB ? out_db.data_ptr() : NULL; 121 | 122 | // Copy rasterized results into CUDA buffers. 123 | rasterizeCopyResults(NVDR_CTX_PARAMS, s, stream, outputPtr, width, height, depth); 124 | 125 | // Done. Release GL context and return. 126 | if (stateWrapper.automatic) 127 | releaseGLContext(); 128 | 129 | return std::tuple(out, out_db); 130 | } 131 | 132 | //------------------------------------------------------------------------ 133 | -------------------------------------------------------------------------------- /nvdiffrast/torch/torch_types.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "torch_common.inl" 10 | 11 | //------------------------------------------------------------------------ 12 | // Python GL state wrapper. 13 | 14 | class RasterizeGLState; 15 | class RasterizeGLStateWrapper 16 | { 17 | public: 18 | RasterizeGLStateWrapper (bool enableDB, bool automatic, int cudaDeviceIdx); 19 | ~RasterizeGLStateWrapper (void); 20 | 21 | void setContext (void); 22 | void releaseContext (void); 23 | 24 | RasterizeGLState* pState; 25 | bool automatic; 26 | int cudaDeviceIdx; 27 | }; 28 | 29 | //------------------------------------------------------------------------ 30 | // Python CudaRaster state wrapper. 31 | 32 | namespace CR { class CudaRaster; } 33 | class RasterizeCRStateWrapper 34 | { 35 | public: 36 | RasterizeCRStateWrapper (int cudaDeviceIdx); 37 | ~RasterizeCRStateWrapper (void); 38 | 39 | CR::CudaRaster* cr; 40 | int cudaDeviceIdx; 41 | }; 42 | 43 | //------------------------------------------------------------------------ 44 | // Mipmap wrapper to prevent intrusion from Python side. 45 | 46 | class TextureMipWrapper 47 | { 48 | public: 49 | torch::Tensor mip; 50 | int max_mip_level; 51 | std::vector texture_size; // For error checking. 52 | bool cube_mode; // For error checking. 53 | }; 54 | 55 | 56 | //------------------------------------------------------------------------ 57 | // Antialias topology hash wrapper to prevent intrusion from Python side. 58 | 59 | class TopologyHashWrapper 60 | { 61 | public: 62 | torch::Tensor ev_hash; 63 | }; 64 | 65 | //------------------------------------------------------------------------ 66 | -------------------------------------------------------------------------------- /run_sample.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # NVIDIA CORPORATION and its licensors retain all intellectual property 6 | # and proprietary rights in and to this software, related documentation 7 | # and any modifications thereto. Any use, reproduction, disclosure or 8 | # distribution of this software and related documentation without an express 9 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | 11 | function print_help { 12 | echo "Usage: `basename $0` [--build-container] " 13 | echo "" 14 | echo "Option --build-container will build the Docker container based on" 15 | echo "docker/Dockerfile and tag the image with gltorch:latest." 16 | echo "" 17 | echo "Example: `basename $0` samples/torch/envphong.py" 18 | } 19 | 20 | build_container=0 21 | sample="" 22 | while [[ "$#" -gt 0 ]]; do 23 | case $1 in 24 | --build-container) build_container=1;; 25 | -h|--help) print_help; exit 0 ;; 26 | --*) echo "Unknown parameter passed: $1"; exit 1 ;; 27 | *) sample="$1"; shift; break; 28 | esac 29 | shift 30 | done 31 | 32 | rest=$@ 33 | 34 | # Build the docker container 35 | if [ "$build_container" = "1" ]; then 36 | docker build --tag gltorch:latest -f docker/Dockerfile . 37 | fi 38 | 39 | if [ ! -f "$sample" ]; then 40 | echo 41 | echo "No python sample given or file '$sample' not found. Exiting." 42 | exit 1 43 | fi 44 | 45 | image="gltorch:latest" 46 | 47 | echo "Using container image: $image" 48 | echo "Running command: $sample $rest" 49 | 50 | # Run a sample with docker 51 | docker run --rm -it --gpus all --user $(id -u):$(id -g) \ 52 | -v `pwd`:/app --workdir /app -e TORCH_EXTENSIONS_DIR=/app/tmp $image python3 $sample $rest 53 | -------------------------------------------------------------------------------- /samples/data/cube_c.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/cube_c.npz -------------------------------------------------------------------------------- /samples/data/cube_d.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/cube_d.npz -------------------------------------------------------------------------------- /samples/data/cube_p.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/cube_p.npz -------------------------------------------------------------------------------- /samples/data/earth.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/earth.npz -------------------------------------------------------------------------------- /samples/data/envphong.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/envphong.npz -------------------------------------------------------------------------------- /samples/tensorflow/cube.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import os 11 | import sys 12 | import pathlib 13 | 14 | import util 15 | import tensorflow as tf 16 | 17 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) # for nvdiffrast 18 | import nvdiffrast.tensorflow as dr 19 | 20 | #---------------------------------------------------------------------------- 21 | # Cube shape fitter. 22 | #---------------------------------------------------------------------------- 23 | 24 | def fit_cube(max_iter = 5000, 25 | resolution = 4, 26 | discontinuous = False, 27 | repeats = 1, 28 | log_interval = 10, 29 | display_interval = None, 30 | display_res = 512, 31 | out_dir = '.', 32 | log_fn = None, 33 | imgsave_interval = None, 34 | imgsave_fn = None): 35 | 36 | if out_dir: 37 | os.makedirs(out_dir, exist_ok=True) 38 | 39 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' 40 | fn = 'cube_%s.npz' % ('d' if discontinuous else 'c') 41 | with np.load(f'{datadir}/{fn}') as f: 42 | pos_idx, vtxp, col_idx, vtxc = f.values() 43 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0])) 44 | 45 | # Transformation matrix input to TF graph. 46 | mtx_in = tf.placeholder(tf.float32, [4, 4]) 47 | 48 | # Setup TF graph for reference. 49 | vtxw = np.concatenate([vtxp, np.ones([vtxp.shape[0], 1])], axis=1).astype(np.float32) 50 | pos_clip = tf.matmul(vtxw, mtx_in, transpose_b=True)[tf.newaxis, ...] 51 | rast_out, _ = dr.rasterize(pos_clip, pos_idx, resolution=[resolution, resolution], output_db=False) 52 | color, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out, col_idx) 53 | color = dr.antialias(color, rast_out, pos_clip, pos_idx) 54 | 55 | # Optimized variables. 56 | vtxc_opt = tf.get_variable('vtxc', initializer=tf.zeros_initializer(), shape=vtxc.shape) 57 | vtxp_opt = tf.get_variable('vtxp', initializer=tf.zeros_initializer(), shape=vtxp.shape) 58 | 59 | # Optimization variable setters for initialization. 60 | vtxc_opt_in = tf.placeholder(tf.float32, vtxc.shape) 61 | vtxp_opt_in = tf.placeholder(tf.float32, vtxp.shape) 62 | opt_set = tf.group(tf.assign(vtxc_opt, vtxc_opt_in), tf.assign(vtxp_opt, vtxp_opt_in)) 63 | 64 | # Setup TF graph for what we optimize result. 65 | vtxw_opt = tf.concat([vtxp_opt, tf.ones([vtxp.shape[0], 1], tf.float32)], axis=1) 66 | pos_clip_opt = tf.matmul(vtxw_opt, mtx_in, transpose_b=True)[tf.newaxis, ...] 67 | rast_out_opt, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[resolution, resolution], output_db=False) 68 | color_opt, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_opt, col_idx) 69 | color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx) 70 | 71 | # Image-space loss and optimizer. 72 | loss = tf.reduce_mean((color_opt - color)**2) 73 | lr_in = tf.placeholder(tf.float32, []) 74 | train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.999).minimize(loss, var_list=[vtxp_opt, vtxc_opt]) 75 | 76 | # Setup TF graph for display. 77 | rast_out_disp, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[display_res, display_res], output_db=False) 78 | color_disp, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_disp, col_idx) 79 | color_disp = dr.antialias(color_disp, rast_out_disp, pos_clip_opt, pos_idx) 80 | rast_out_disp_ref, _ = dr.rasterize(pos_clip, pos_idx, resolution=[display_res, display_res], output_db=False) 81 | color_disp_ref, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out_disp_ref, col_idx) 82 | color_disp_ref = dr.antialias(color_disp_ref, rast_out_disp_ref, pos_clip, pos_idx) 83 | 84 | # Geometric error calculation 85 | geom_loss = tf.reduce_mean(tf.reduce_sum((tf.abs(vtxp_opt) - .5)**2, axis=1)**0.5) 86 | 87 | # Open log file. 88 | log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None 89 | 90 | # Repeats. 91 | for rep in range(repeats): 92 | 93 | # Optimize. 94 | ang = 0.0 95 | gl_avg = [] 96 | util.init_uninitialized_vars() 97 | for it in range(max_iter + 1): 98 | # Initialize optimization. 99 | if it == 0: 100 | vtxp_init = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp 101 | vtxc_init = np.random.uniform(0.0, 1.0, size=vtxc.shape) 102 | util.run(opt_set, {vtxc_opt_in: vtxc_init.astype(np.float32), vtxp_opt_in: vtxp_init.astype(np.float32)}) 103 | 104 | # Learning rate ramp. 105 | lr = 1e-2 106 | lr = lr * max(0.01, 10**(-it*0.0005)) 107 | 108 | # Random rotation/translation matrix for optimization. 109 | r_rot = util.random_rotation_translation(0.25) 110 | 111 | # Smooth rotation for display. 112 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) 113 | 114 | # Modelview and modelview + projection matrices. 115 | proj = util.projection(x=0.4) 116 | r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot) 117 | r_mvp = np.matmul(proj, r_mv).astype(np.float32) 118 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) 119 | a_mvp = np.matmul(proj, a_mv).astype(np.float32) 120 | 121 | # Run training and measure geometric error. 122 | gl_val, _ = util.run([geom_loss, train_op], {mtx_in: r_mvp, lr_in: lr}) 123 | gl_avg.append(gl_val) 124 | 125 | # Print/save log. 126 | if log_interval and (it % log_interval == 0): 127 | gl_val, gl_avg = np.mean(np.asarray(gl_avg)), [] 128 | s = ("rep=%d," % rep) if repeats > 1 else "" 129 | s += "iter=%d,err=%f" % (it, gl_val) 130 | print(s) 131 | if log_file: 132 | log_file.write(s + "\n") 133 | 134 | # Show/save image. 135 | display_image = display_interval and (it % display_interval == 0) 136 | save_image = imgsave_interval and (it % imgsave_interval == 0) 137 | 138 | if display_image or save_image: 139 | ang = ang + 0.1 140 | img_o = util.run(color_opt, {mtx_in: r_mvp})[0] 141 | img_b = util.run(color, {mtx_in: r_mvp})[0] 142 | img_d = util.run(color_disp, {mtx_in: a_mvp})[0] 143 | img_r = util.run(color_disp_ref, {mtx_in: a_mvp})[0] 144 | 145 | scl = display_res // img_o.shape[0] 146 | img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1) 147 | img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1) 148 | result_image = np.concatenate([img_o, img_b, img_d, img_r], axis=1) 149 | 150 | if display_image: 151 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) 152 | if save_image: 153 | util.save_image(out_dir + '/' + (imgsave_fn % it), result_image) 154 | 155 | # All repeats done. 156 | if log_file: 157 | log_file.close() 158 | 159 | #---------------------------------------------------------------------------- 160 | # Main function. 161 | #---------------------------------------------------------------------------- 162 | 163 | def main(): 164 | display_interval = 0 165 | discontinuous = False 166 | resolution = 0 167 | 168 | def usage(): 169 | print("Usage: python cube.py [-v] [-discontinuous] resolution") 170 | exit() 171 | 172 | for a in sys.argv[1:]: 173 | if a == '-v': 174 | display_interval = 100 175 | elif a == '-discontinuous': 176 | discontinuous = True 177 | elif a.isdecimal(): 178 | resolution = int(a) 179 | else: 180 | usage() 181 | 182 | if resolution <= 0: 183 | usage() 184 | 185 | # Initialize TensorFlow. 186 | util.init_tf() 187 | 188 | # Run. 189 | out_dir = 'out/cube_%s_%d' % (('d' if discontinuous else 'c'), resolution) 190 | fit_cube(max_iter=5000, resolution=resolution, discontinuous=discontinuous, log_interval=10, display_interval=display_interval, out_dir=out_dir, log_fn='log.txt', imgsave_interval=1000, imgsave_fn='img_%06d.png') 191 | 192 | # Done. 193 | print("Done.") 194 | 195 | #---------------------------------------------------------------------------- 196 | 197 | if __name__ == "__main__": 198 | main() 199 | 200 | #---------------------------------------------------------------------------- 201 | -------------------------------------------------------------------------------- /samples/tensorflow/earth.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | import os 12 | import sys 13 | import pathlib 14 | 15 | import util 16 | 17 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) # for nvdiffrast 18 | import nvdiffrast.tensorflow as dr 19 | 20 | #---------------------------------------------------------------------------- 21 | # Texture learning with/without mipmaps. 22 | #---------------------------------------------------------------------------- 23 | 24 | def fit_earth(max_iter = 20000, 25 | log_interval = 10, 26 | display_interval = None, 27 | display_res = 1024, 28 | enable_mip = True, 29 | res = 512, 30 | ref_res = 4096, 31 | lr_base = 1e-2, 32 | lr_ramp = 0.1, 33 | out_dir = '.', 34 | log_fn = None, 35 | texsave_interval = None, 36 | texsave_fn = None, 37 | imgsave_interval = None, 38 | imgsave_fn = None): 39 | 40 | if out_dir: 41 | os.makedirs(out_dir, exist_ok=True) 42 | 43 | # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at 44 | # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125 45 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' 46 | with np.load(f'{datadir}/earth.npz') as f: 47 | pos_idx, pos, uv_idx, uv, tex = f.values() 48 | tex = tex.astype(np.float32)/255.0 49 | max_mip_level = 9 # Texture is a 4x3 atlas of 512x512 maps. 50 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0])) 51 | 52 | # Transformation matrix input to TF graph. 53 | mtx_in = tf.placeholder(tf.float32, [4, 4]) 54 | 55 | # Learned texture. 56 | tex_var = tf.get_variable('tex', initializer=tf.constant_initializer(0.2), shape=tex.shape) 57 | 58 | # Setup TF graph for reference rendering in high resolution. 59 | pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...] 60 | rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [ref_res, ref_res]) 61 | texc, texd = dr.interpolate(uv[tf.newaxis, ...], rast_out, uv_idx, rast_db=rast_out_db, diff_attrs='all') 62 | color = dr.texture(tex[np.newaxis], texc, texd, filter_mode='linear-mipmap-linear', max_mip_level=max_mip_level) 63 | color = color * tf.clip_by_value(rast_out[..., -1:], 0, 1) # Mask out background. 64 | 65 | # Reduce the reference to correct size. 66 | while color.shape[1] > res: 67 | color = util.bilinear_downsample(color) 68 | 69 | # TF Graph for rendered candidate. 70 | if enable_mip: 71 | # With mipmaps. 72 | rast_out_opt, rast_out_db_opt = dr.rasterize(pos_clip, pos_idx, [res, res]) 73 | texc_opt, texd_opt = dr.interpolate(uv[tf.newaxis, ...], rast_out_opt, uv_idx, rast_db=rast_out_db_opt, diff_attrs='all') 74 | color_opt = dr.texture(tex_var[np.newaxis], texc_opt, texd_opt, filter_mode='linear-mipmap-linear', max_mip_level=max_mip_level) 75 | else: 76 | # No mipmaps: no image-space derivatives anywhere. 77 | rast_out_opt, _ = dr.rasterize(pos_clip, pos_idx, [res, res], output_db=False) 78 | texc_opt, _ = dr.interpolate(uv[tf.newaxis, ...], rast_out_opt, uv_idx) 79 | color_opt = dr.texture(tex_var[np.newaxis], texc_opt, filter_mode='linear') 80 | color_opt = color_opt * tf.clip_by_value(rast_out_opt[..., -1:], 0, 1) # Mask out background. 81 | 82 | # Measure only relevant portions of texture when calculating texture PSNR. 83 | loss = tf.reduce_mean((color - color_opt)**2) 84 | texmask = np.zeros_like(tex) 85 | tr = tex.shape[1]//4 86 | texmask[tr+13:2*tr-13, 25:-25, :] += 1.0 87 | texmask[25:-25, tr+13:2*tr-13, :] += 1.0 88 | texloss = (tf.reduce_sum(texmask * (tex - tex_var)**2)/np.sum(texmask))**0.5 # RMSE within masked area. 89 | 90 | # Training driven by image-space loss. 91 | lr_in = tf.placeholder(tf.float32, []) 92 | train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.99).minimize(loss, var_list=[tex_var]) 93 | 94 | # Open log file. 95 | log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None 96 | 97 | # Render. 98 | ang = 0.0 99 | util.init_uninitialized_vars() 100 | texloss_avg = [] 101 | for it in range(max_iter + 1): 102 | lr = lr_base * lr_ramp**(float(it)/float(max_iter)) 103 | 104 | # Random rotation/translation matrix for optimization. 105 | r_rot = util.random_rotation_translation(0.25) 106 | 107 | # Smooth rotation for display. 108 | ang = ang + 0.01 109 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) 110 | dist = np.random.uniform(0.0, 48.5) 111 | 112 | # Modelview and modelview + projection matrices. 113 | proj = util.projection(x=0.4, n=1.0, f=200.0) 114 | r_mv = np.matmul(util.translate(0, 0, -1.5-dist), r_rot) 115 | r_mvp = np.matmul(proj, r_mv).astype(np.float32) 116 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) 117 | a_mvp = np.matmul(proj, a_mv).astype(np.float32) 118 | 119 | # Run training and measure texture-space RMSE loss. 120 | texloss_val, _ = util.run([texloss, train_op], {mtx_in: r_mvp, lr_in: lr}) 121 | texloss_avg.append(texloss_val) 122 | 123 | # Print/save log. 124 | if log_interval and (it % log_interval == 0): 125 | texloss_val, texloss_avg = np.mean(np.asarray(texloss_avg)), [] 126 | psnr = -10.0 * np.log10(texloss_val**2) # PSNR based on average RMSE. 127 | s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr) 128 | print(s) 129 | if log_file: 130 | log_file.write(s + '\n') 131 | 132 | # Show/save result images/textures. 133 | display_image = display_interval and (it % display_interval) == 0 134 | save_image = imgsave_interval and (it % imgsave_interval) == 0 135 | save_texture = texsave_interval and (it % texsave_interval) == 0 136 | 137 | if display_image or save_image: 138 | result_image = util.run(color_opt, {mtx_in: a_mvp})[0] 139 | if display_image: 140 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) 141 | if save_image: 142 | util.save_image(out_dir + '/' + (imgsave_fn % it), result_image) 143 | if save_texture: 144 | util.save_image(out_dir + '/' + (texsave_fn % it), util.run(tex_var)[::-1]) 145 | 146 | # Done. 147 | if log_file: 148 | log_file.close() 149 | 150 | #---------------------------------------------------------------------------- 151 | # Main function. 152 | #---------------------------------------------------------------------------- 153 | 154 | def main(): 155 | display_interval = 0 156 | enable_mip = None 157 | 158 | def usage(): 159 | print("Usage: python earth.py [-v] [-mip|-nomip]") 160 | exit() 161 | 162 | for a in sys.argv[1:]: 163 | if a == '-v': display_interval = 10 164 | elif a == '-mip': enable_mip = True 165 | elif a == '-nomip': enable_mip = False 166 | else: usage() 167 | 168 | if enable_mip is None: 169 | usage() 170 | 171 | # Initialize TensorFlow. 172 | util.init_tf() 173 | 174 | # Run. 175 | out_dir = 'out/earth_mip' if enable_mip else 'out/earth_nomip' 176 | fit_earth(max_iter=20000, log_interval=10, display_interval=display_interval, enable_mip=enable_mip, out_dir=out_dir, log_fn='log.txt', texsave_interval=1000, texsave_fn='tex_%06d.png', imgsave_interval=1000, imgsave_fn='img_%06d.png') 177 | 178 | # Done. 179 | print("Done.") 180 | 181 | #---------------------------------------------------------------------------- 182 | 183 | if __name__ == "__main__": 184 | main() 185 | 186 | #---------------------------------------------------------------------------- 187 | -------------------------------------------------------------------------------- /samples/tensorflow/envphong.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | import os 12 | import sys 13 | import pathlib 14 | 15 | import util 16 | 17 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) # for nvdiffrast 18 | import nvdiffrast.tensorflow as dr 19 | 20 | #---------------------------------------------------------------------------- 21 | # Environment map and Phong BRDF learning. 22 | #---------------------------------------------------------------------------- 23 | 24 | def fit_env_phong(max_iter = 1000, 25 | log_interval = 10, 26 | display_interval = None, 27 | display_res = 1024, 28 | res = 1024, 29 | lr_base = 1e-2, 30 | lr_ramp = 1.0, 31 | out_dir = '.', 32 | log_fn = None, 33 | imgsave_interval = None, 34 | imgsave_fn = None): 35 | 36 | if out_dir: 37 | os.makedirs(out_dir, exist_ok=True) 38 | 39 | # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap 40 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' 41 | with np.load(f'{datadir}/envphong.npz') as f: 42 | pos_idx, pos, normals, env = f.values() 43 | env = env.astype(np.float32)/255.0 44 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0])) 45 | 46 | # Target Phong parameters. 47 | phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32) 48 | phong_exp = 25.0 49 | 50 | # Inputs to TF graph. 51 | mtx_in = tf.placeholder(tf.float32, [4, 4]) 52 | invmtx_in = tf.placeholder(tf.float32, [4, 4]) # Inverse. 53 | campos_in = tf.placeholder(tf.float32, [3]) # Camera position in world space. 54 | lightdir_in = tf.placeholder(tf.float32, [3]) # Light direction. 55 | 56 | # Learned variables: environment maps, phong color, phong exponent. 57 | env_var = tf.get_variable('env_var', initializer=tf.constant_initializer(0.5), shape=env.shape) 58 | phong_var_raw = tf.get_variable('phong_var', initializer=tf.random_uniform_initializer(0.0, 1.0), shape=[4]) # R, G, B, exp. 59 | phong_var = phong_var_raw * [1.0, 1.0, 1.0, 10.0] # Faster learning rate for the exponent. 60 | 61 | # Transform and rasterize. 62 | viewvec = pos[..., :3] - campos_in[np.newaxis, np.newaxis, :] # View vectors at vertices. 63 | reflvec = viewvec - 2.0 * normals[tf.newaxis, ...] * tf.reduce_sum(normals[tf.newaxis, ...] * viewvec, axis=-1, keepdims=True) # Reflection vectors at vertices. 64 | reflvec = reflvec / tf.reduce_sum(reflvec**2, axis=-1, keepdims=True)**0.5 # Normalize. 65 | pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...] 66 | rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [res, res]) 67 | refl, refld = dr.interpolate(reflvec, rast_out, pos_idx, rast_db=rast_out_db, diff_attrs='all') # Interpolated reflection vectors. 68 | 69 | # Phong light. 70 | refl = refl / tf.reduce_sum(refl**2, axis=-1, keepdims=True)**0.5 # Normalize. 71 | ldotr = tf.reduce_sum(-lightdir_in * refl, axis=-1, keepdims=True) # L dot R. 72 | 73 | # Reference color. No need for AA because we are not learning geometry. 74 | env = np.stack(env)[:, ::-1] 75 | color = dr.texture(env[np.newaxis, ...], refl, refld, filter_mode='linear-mipmap-linear', boundary_mode='cube') 76 | color = tf.reduce_sum(tf.stack(color), axis=0) 77 | color = color + phong_rgb * tf.maximum(0.0, ldotr) ** phong_exp # Phong. 78 | color = tf.maximum(color, 1.0 - tf.clip_by_value(rast_out[..., -1:], 0, 1)) # White background. 79 | 80 | # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead. 81 | color_opt = dr.texture(env_var[tf.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube') 82 | color_opt = tf.reduce_sum(tf.stack(color_opt), axis=0) 83 | color_opt = color_opt + phong_var[:3] * tf.maximum(0.0, ldotr) ** phong_var[3] # Phong. 84 | color_opt = tf.maximum(color_opt, 1.0 - tf.clip_by_value(rast_out[..., -1:], 0, 1)) # White background. 85 | 86 | # Training. 87 | loss = tf.reduce_mean((color - color_opt)**2) # L2 pixel loss. 88 | lr_in = tf.placeholder(tf.float32, []) 89 | train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.99).minimize(loss, var_list=[env_var, phong_var_raw]) 90 | 91 | # Open log file. 92 | log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None 93 | 94 | # Render. 95 | ang = 0.0 96 | util.init_uninitialized_vars() 97 | imgloss_avg, phong_avg = [], [] 98 | for it in range(max_iter + 1): 99 | lr = lr_base * lr_ramp**(float(it)/float(max_iter)) 100 | 101 | # Random rotation/translation matrix for optimization. 102 | r_rot = util.random_rotation_translation(0.25) 103 | 104 | # Smooth rotation for display. 105 | ang = ang + 0.01 106 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) 107 | 108 | # Modelview and modelview + projection matrices. 109 | proj = util.projection(x=0.4, n=1.0, f=200.0) 110 | r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot) 111 | r_mvp = np.matmul(proj, r_mv).astype(np.float32) 112 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) 113 | a_mvp = np.matmul(proj, a_mv).astype(np.float32) 114 | 115 | # Solve camera positions. 116 | a_campos = np.linalg.inv(a_mv)[:3, 3] 117 | r_campos = np.linalg.inv(r_mv)[:3, 3] 118 | 119 | # Random light direction. 120 | lightdir = np.random.normal(size=[3]) 121 | lightdir /= np.linalg.norm(lightdir) + 1e-8 122 | 123 | # Run training and measure image-space RMSE loss. 124 | imgloss_val, phong_val, _ = util.run([loss, phong_var, train_op], {mtx_in: r_mvp, invmtx_in: np.linalg.inv(r_mvp), campos_in: r_campos, lightdir_in: lightdir, lr_in: lr}) 125 | imgloss_avg.append(imgloss_val**0.5) 126 | phong_avg.append(phong_val) 127 | 128 | # Print/save log. 129 | if log_interval and (it % log_interval == 0): 130 | imgloss_val, imgloss_avg = np.mean(np.asarray(imgloss_avg, np.float32)), [] 131 | phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32), axis=0), [] 132 | phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5 133 | phong_exp_rel_err = np.abs(phong_val[3] - phong_exp)/phong_exp 134 | s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val) 135 | print(s) 136 | if log_file: 137 | log_file.write(s + '\n') 138 | 139 | # Show/save result image. 140 | display_image = display_interval and (it % display_interval == 0) 141 | save_image = imgsave_interval and (it % imgsave_interval == 0) 142 | 143 | if display_image or save_image: 144 | result_image = util.run(color_opt, {mtx_in: a_mvp, invmtx_in: np.linalg.inv(a_mvp), campos_in: a_campos, lightdir_in: lightdir})[0] 145 | if display_image: 146 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) 147 | if save_image: 148 | util.save_image(out_dir + '/' + (imgsave_fn % it), result_image) 149 | 150 | # Done. 151 | if log_file: 152 | log_file.close() 153 | 154 | #---------------------------------------------------------------------------- 155 | # Main function. 156 | #---------------------------------------------------------------------------- 157 | 158 | def main(): 159 | display_interval = 0 160 | for a in sys.argv[1:]: 161 | if a == '-v': 162 | display_interval = 10 163 | else: 164 | print("Usage: python envphong.py [-v]") 165 | exit() 166 | 167 | # Initialize TensorFlow. 168 | util.init_tf() 169 | 170 | # Run. 171 | fit_env_phong(max_iter=1500, log_interval=10, display_interval=display_interval, out_dir='out/env_phong', log_fn='log.txt', imgsave_interval=100, imgsave_fn='img_%06d.png') 172 | 173 | # Done. 174 | print("Done.") 175 | 176 | #---------------------------------------------------------------------------- 177 | 178 | if __name__ == "__main__": 179 | main() 180 | 181 | #---------------------------------------------------------------------------- 182 | -------------------------------------------------------------------------------- /samples/tensorflow/pose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | import os 12 | import sys 13 | import util 14 | import pathlib 15 | 16 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) # for nvdiffrast 17 | import nvdiffrast.tensorflow as dr 18 | 19 | #---------------------------------------------------------------------------- 20 | # Quaternion math. 21 | #---------------------------------------------------------------------------- 22 | 23 | # Unit quaternion. 24 | def q_unit(): 25 | return np.asarray([1, 0, 0, 0], np.float32) 26 | 27 | # Get a random normalized quaternion. 28 | def q_rnd(): 29 | u, v, w = np.random.uniform(0.0, 1.0, size=[3]) 30 | v *= 2.0 * np.pi 31 | w *= 2.0 * np.pi 32 | return np.asarray([(1.0-u)**0.5 * np.sin(v), (1.0-u)**0.5 * np.cos(v), u**0.5 * np.sin(w), u**0.5 * np.cos(w)], np.float32) 33 | 34 | # Get a random quaternion from the octahedral symmetric group S_4. 35 | _r2 = 0.5**0.5 36 | _q_S4 = [[ 1.0, 0.0, 0.0, 0.0], [ 0.0, 1.0, 0.0, 0.0], [ 0.0, 0.0, 1.0, 0.0], [ 0.0, 0.0, 0.0, 1.0], 37 | [-0.5, 0.5, 0.5, 0.5], [-0.5,-0.5,-0.5, 0.5], [ 0.5,-0.5, 0.5, 0.5], [ 0.5, 0.5,-0.5, 0.5], 38 | [ 0.5, 0.5, 0.5, 0.5], [-0.5, 0.5,-0.5, 0.5], [ 0.5,-0.5,-0.5, 0.5], [-0.5,-0.5, 0.5, 0.5], 39 | [ _r2,-_r2, 0.0, 0.0], [ _r2, _r2, 0.0, 0.0], [ 0.0, 0.0, _r2, _r2], [ 0.0, 0.0,-_r2, _r2], 40 | [ 0.0, _r2, _r2, 0.0], [ _r2, 0.0, 0.0,-_r2], [ _r2, 0.0, 0.0, _r2], [ 0.0,-_r2, _r2, 0.0], 41 | [ _r2, 0.0, _r2, 0.0], [ 0.0, _r2, 0.0, _r2], [ _r2, 0.0,-_r2, 0.0], [ 0.0,-_r2, 0.0, _r2]] 42 | def q_rnd_S4(): 43 | return np.asarray(_q_S4[np.random.randint(24)], np.float32) 44 | 45 | # Quaternion slerp. 46 | def q_slerp(p, q, t): 47 | d = np.dot(p, q) 48 | if d < 0.0: 49 | q = -q 50 | d = -d 51 | if d > 0.999: 52 | a = p + t * (q-p) 53 | return a / np.linalg.norm(a) 54 | t0 = np.arccos(d) 55 | tt = t0 * t 56 | st = np.sin(tt) 57 | st0 = np.sin(t0) 58 | s1 = st / st0 59 | s0 = np.cos(tt) - d*s1 60 | return s0*p + s1*q 61 | 62 | # Quaterion scale (slerp vs. identity quaternion). 63 | def q_scale(q, scl): 64 | return q_slerp(q_unit(), q, scl) 65 | 66 | # Quaternion product. 67 | def q_mul(p, q): 68 | s1, V1 = p[0], p[1:] 69 | s2, V2 = q[0], q[1:] 70 | s = s1*s2 - np.dot(V1, V2) 71 | V = s1*V2 + s2*V1 + np.cross(V1, V2) 72 | return np.asarray([s, V[0], V[1], V[2]], np.float32) 73 | 74 | # Angular difference between two quaternions in degrees. 75 | def q_angle_deg(p, q): 76 | d = np.abs(np.dot(p, q)) 77 | d = min(d, 1.0) 78 | return np.degrees(2.0 * np.arccos(d)) 79 | 80 | # Quaternion product in TensorFlow. 81 | def q_mul_tf(p, q): 82 | a = p[0]*q[0] - p[1]*q[1] - p[2]*q[2] - p[3]*q[3] 83 | b = p[0]*q[1] + p[1]*q[0] + p[2]*q[3] - p[3]*q[2] 84 | c = p[0]*q[2] + p[2]*q[0] + p[3]*q[1] - p[1]*q[3] 85 | d = p[0]*q[3] + p[3]*q[0] + p[1]*q[2] - p[2]*q[1] 86 | return tf.stack([a, b, c, d]) 87 | 88 | # Convert quaternion to 4x4 rotation matrix. TensorFlow. 89 | def q_to_mtx_tf(q): 90 | r0 = tf.stack([1.0-2.0*q[1]**2 - 2.0*q[2]**2, 2.0*q[0]*q[1] - 2.0*q[2]*q[3], 2.0*q[0]*q[2] + 2.0*q[1]*q[3]]) 91 | r1 = tf.stack([2.0*q[0]*q[1] + 2.0*q[2]*q[3], 1.0 - 2.0*q[0]**2 - 2.0*q[2]**2, 2.0*q[1]*q[2] - 2.0*q[0]*q[3]]) 92 | r2 = tf.stack([2.0*q[0]*q[2] - 2.0*q[1]*q[3], 2.0*q[1]*q[2] + 2.0*q[0]*q[3], 1.0 - 2.0*q[0]**2 - 2.0*q[1]**2]) 93 | rr = tf.transpose(tf.stack([r0, r1, r2]), [1, 0]) 94 | rr = tf.concat([rr, tf.convert_to_tensor([[0], [0], [0]], tf.float32)], axis=1) # Pad right column. 95 | rr = tf.concat([rr, tf.convert_to_tensor([[0, 0, 0, 1]], tf.float32)], axis=0) # Pad bottom row. 96 | return rr 97 | 98 | #---------------------------------------------------------------------------- 99 | # Cube pose fitter. 100 | #---------------------------------------------------------------------------- 101 | 102 | def fit_pose(max_iter = 10000, 103 | repeats = 1, 104 | log_interval = 10, 105 | display_interval = None, 106 | display_res = 512, 107 | lr_base = 0.01, 108 | lr_falloff = 1.0, 109 | nr_base = 1.0, 110 | nr_falloff = 1e-4, 111 | grad_phase_start = 0.5, 112 | resolution = 256, 113 | out_dir = '.', 114 | log_fn = None, 115 | imgsave_interval = None, 116 | imgsave_fn = None): 117 | 118 | if out_dir: 119 | os.makedirs(out_dir, exist_ok=True) 120 | 121 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' 122 | with np.load(f'{datadir}/cube_p.npz') as f: 123 | pos_idx, pos, col_idx, col = f.values() 124 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0])) 125 | 126 | # Transformation matrix input to TF graph. 127 | mtx_in = tf.placeholder(tf.float32, [4, 4]) 128 | 129 | # Pose matrix input to TF graph. 130 | pose_in = tf.placeholder(tf.float32, [4]) # Quaternion. 131 | noise_in = tf.placeholder(tf.float32, [4]) # Mollification noise. 132 | 133 | # Setup TF graph for reference. 134 | mtx_total = tf.matmul(mtx_in, q_to_mtx_tf(pose_in)) 135 | pos_clip = tf.matmul(pos, mtx_total, transpose_b=True)[tf.newaxis, ...] 136 | rast_out, _ = dr.rasterize(pos_clip, pos_idx, resolution=[resolution, resolution], output_db=False) 137 | color, _ = dr.interpolate(col[tf.newaxis, ...], rast_out, col_idx) 138 | color = dr.antialias(color, rast_out, pos_clip, pos_idx) 139 | 140 | # Setup TF graph for optimization candidate. 141 | pose_var = tf.get_variable('pose', initializer=tf.zeros_initializer(), shape=[4]) 142 | pose_var_in = tf.placeholder(tf.float32, [4]) 143 | pose_set = tf.assign(pose_var, pose_var_in) 144 | pose_norm_op = tf.assign(pose_var, pose_var / tf.reduce_sum(pose_var**2)**0.5) # Normalization operation. 145 | pose_total = q_mul_tf(pose_var, noise_in) 146 | mtx_total_opt = tf.matmul(mtx_in, q_to_mtx_tf(pose_total)) 147 | pos_clip_opt = tf.matmul(pos, mtx_total_opt, transpose_b=True)[tf.newaxis, ...] 148 | rast_out_opt, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[resolution, resolution], output_db=False) 149 | color_opt, _ = dr.interpolate(col[tf.newaxis, ...], rast_out_opt, col_idx) 150 | color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx) 151 | 152 | # Image-space loss. 153 | diff = (color_opt - color)**2 # L2 norm. 154 | diff = tf.tanh(5.0 * tf.reduce_max(diff, axis=-1)) # Add some oomph to the loss. 155 | loss = tf.reduce_mean(diff) 156 | lr_in = tf.placeholder(tf.float32, []) 157 | train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.999).minimize(loss, var_list=[pose_var]) 158 | 159 | # Open log file. 160 | log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None 161 | 162 | # Repeats. 163 | for rep in range(repeats): 164 | 165 | # Optimize. 166 | util.init_uninitialized_vars() 167 | loss_best = np.inf 168 | pose_best = None 169 | for it in range(max_iter + 1): 170 | # Modelview + projection matrix. 171 | mvp = np.matmul(util.projection(x=0.4), util.translate(0, 0, -3.5)).astype(np.float32) 172 | 173 | # Learning and noise rate scheduling. 174 | itf = 1.0 * it / max_iter 175 | lr = lr_base * lr_falloff**itf 176 | nr = nr_base * nr_falloff**itf 177 | 178 | # Noise input. 179 | if itf >= grad_phase_start: 180 | noise = q_unit() 181 | else: 182 | noise = q_scale(q_rnd(), nr) 183 | noise = q_mul(noise, q_rnd_S4()) # Orientation noise. 184 | 185 | # Initialize optimization. 186 | if it == 0: 187 | pose_target = q_rnd() 188 | util.run(pose_set, {pose_var_in: q_rnd()}) 189 | util.run(pose_norm_op) 190 | util.run(loss, {mtx_in: mvp, pose_in: pose_target, noise_in: noise}) # Pipecleaning pass. 191 | 192 | # Run gradient training step. 193 | if itf >= grad_phase_start: 194 | util.run(train_op, {mtx_in: mvp, pose_in: pose_target, noise_in: noise, lr_in: lr}) 195 | util.run(pose_norm_op) 196 | 197 | # Measure image-space loss and update best found pose. 198 | loss_val = util.run(loss, {mtx_in: mvp, pose_in: pose_target, noise_in: noise, lr_in: lr}) 199 | if loss_val < loss_best: 200 | pose_best = util.run(pose_total, {noise_in: noise}) 201 | if loss_val > 0.0: 202 | loss_best = loss_val 203 | else: 204 | # Return to best pose in the greedy phase. 205 | if itf < grad_phase_start: 206 | util.run(pose_set, {pose_var_in: pose_best}) 207 | 208 | # Print/save log. 209 | if log_interval and (it % log_interval == 0): 210 | err = q_angle_deg(util.run(pose_var), pose_target) 211 | ebest = q_angle_deg(pose_best, pose_target) 212 | s = "rep=%d,iter=%d,err=%f,err_best=%f,loss=%f,loss_best=%f,lr=%f,nr=%f" % (rep, it, err, ebest, loss_val, loss_best, lr, nr) 213 | print(s) 214 | if log_file: 215 | log_file.write(s + "\n") 216 | 217 | # Show/save image. 218 | display_image = display_interval and (it % display_interval == 0) 219 | save_image = imgsave_interval and (it % imgsave_interval == 0) 220 | 221 | if display_image or save_image: 222 | img_ref, img_opt = util.run([color, color_opt], {mtx_in: mvp, pose_in: pose_target, noise_in: noise}) 223 | img_best, = util.run([color_opt], {mtx_in: mvp, pose_in: pose_best, noise_in: q_unit()}) 224 | img_ref = img_ref[0] 225 | img_opt = img_opt[0] 226 | img_best = img_best[0] 227 | result_image = np.concatenate([img_ref, img_best, img_opt], axis=1) 228 | 229 | if display_image: 230 | util.display_image(result_image, size=display_res, title='(%d) %d / %d' % (rep, it, max_iter)) 231 | if save_image: 232 | util.save_image(out_dir + '/' + (imgsave_fn % (rep, it)), result_image) 233 | 234 | # All repeats done. 235 | if log_file: 236 | log_file.close() 237 | 238 | #---------------------------------------------------------------------------- 239 | # Main function. 240 | #---------------------------------------------------------------------------- 241 | 242 | def main(): 243 | display_interval = 0 244 | repeats = 1 245 | 246 | def usage(): 247 | print("Usage: python pose.py [-v] [repeats]") 248 | exit() 249 | 250 | for a in sys.argv[1:]: 251 | if a == '-v': 252 | display_interval = 10 253 | elif a.isascii() and a.isdecimal(): 254 | repeats = int(a) 255 | else: 256 | usage() 257 | 258 | if repeats <= 0: 259 | usage() 260 | 261 | # Initialize TensorFlow. 262 | util.init_tf() 263 | 264 | # Run. 265 | fit_pose(max_iter=1000, repeats=repeats, log_interval=100, display_interval=display_interval, out_dir='out/pose', log_fn='log.txt', imgsave_interval=1000, imgsave_fn='img_%03d_%06d.png') 266 | 267 | # Done. 268 | print("Done.") 269 | 270 | #---------------------------------------------------------------------------- 271 | 272 | if __name__ == "__main__": 273 | main() 274 | 275 | #---------------------------------------------------------------------------- 276 | -------------------------------------------------------------------------------- /samples/tensorflow/triangle.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import imageio 10 | import logging 11 | import os 12 | import numpy as np 13 | import tensorflow as tf 14 | import nvdiffrast.tensorflow as dr 15 | 16 | # Silence deprecation warnings and debug level logging 17 | logging.getLogger('tensorflow').setLevel(logging.ERROR) 18 | os.environ.setdefault('TF_CPP_MIN_LOG_LEVEL', '1') 19 | 20 | pos = tf.convert_to_tensor([[[-0.8, -0.8, 0, 1], [0.8, -0.8, 0, 1], [-0.8, 0.8, 0, 1]]], dtype=tf.float32) 21 | col = tf.convert_to_tensor([[[1, 0, 0], [0, 1, 0], [0, 0, 1]]], dtype=tf.float32) 22 | tri = tf.convert_to_tensor([[0, 1, 2]], dtype=tf.int32) 23 | 24 | rast, _ = dr.rasterize(pos, tri, resolution=[256, 256]) 25 | out, _ = dr.interpolate(col, rast, tri) 26 | 27 | with tf.Session() as sess: 28 | img = sess.run(out) 29 | 30 | img = img[0, ::-1, :, :] # Flip vertically. 31 | img = np.clip(np.rint(img * 255), 0, 255).astype(np.uint8) # Quantize to np.uint8 32 | 33 | print("Saving to 'tri.png'.") 34 | imageio.imsave('tri.png', img) 35 | -------------------------------------------------------------------------------- /samples/tensorflow/util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | 10 | import os 11 | import numpy as np 12 | import tensorflow as tf 13 | 14 | # Silence deprecation warnings from TensorFlow 1.13 onwards 15 | import logging 16 | logging.getLogger('tensorflow').setLevel(logging.ERROR) 17 | 18 | from typing import Any, List 19 | 20 | #---------------------------------------------------------------------------- 21 | # Projection and transformation matrix helpers. 22 | #---------------------------------------------------------------------------- 23 | 24 | def projection(x=0.1, n=1.0, f=50.0): 25 | return np.array([[n/x, 0, 0, 0], 26 | [ 0, n/-x, 0, 0], 27 | [ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)], 28 | [ 0, 0, -1, 0]]).astype(np.float32) 29 | 30 | def translate(x, y, z): 31 | return np.array([[1, 0, 0, x], 32 | [0, 1, 0, y], 33 | [0, 0, 1, z], 34 | [0, 0, 0, 1]]).astype(np.float32) 35 | 36 | def rotate_x(a): 37 | s, c = np.sin(a), np.cos(a) 38 | return np.array([[1, 0, 0, 0], 39 | [0, c, s, 0], 40 | [0, -s, c, 0], 41 | [0, 0, 0, 1]]).astype(np.float32) 42 | 43 | def rotate_y(a): 44 | s, c = np.sin(a), np.cos(a) 45 | return np.array([[ c, 0, s, 0], 46 | [ 0, 1, 0, 0], 47 | [-s, 0, c, 0], 48 | [ 0, 0, 0, 1]]).astype(np.float32) 49 | 50 | def random_rotation_translation(t): 51 | m = np.random.normal(size=[3, 3]) 52 | m[1] = np.cross(m[0], m[2]) 53 | m[2] = np.cross(m[0], m[1]) 54 | m = m / np.linalg.norm(m, axis=1, keepdims=True) 55 | m = np.pad(m, [[0, 1], [0, 1]], mode='constant') 56 | m[3, 3] = 1.0 57 | m[:3, 3] = np.random.uniform(-t, t, size=[3]) 58 | return m 59 | 60 | #---------------------------------------------------------------------------- 61 | # Bilinear downsample by 2x. 62 | #---------------------------------------------------------------------------- 63 | 64 | def bilinear_downsample(x): 65 | w = tf.constant([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=tf.float32) / 64.0 66 | w = w[..., tf.newaxis, tf.newaxis] * tf.eye(x.shape[-1].value, batch_shape=[1, 1]) 67 | x = tf.nn.conv2d(x, w, strides=2, padding='SAME') 68 | return x 69 | 70 | #---------------------------------------------------------------------------- 71 | # Image display function using OpenGL. 72 | #---------------------------------------------------------------------------- 73 | 74 | _glfw_window = None 75 | def display_image(image, zoom=None, size=None, title=None): # HWC 76 | # Import OpenGL and glfw. 77 | import OpenGL.GL as gl 78 | import glfw 79 | 80 | # Zoom image if requested. 81 | image = np.asarray(image) 82 | if size is not None: 83 | assert zoom is None 84 | zoom = max(1, size // image.shape[0]) 85 | if zoom is not None: 86 | image = image.repeat(zoom, axis=0).repeat(zoom, axis=1) 87 | height, width, channels = image.shape 88 | 89 | # Initialize window. 90 | if title is None: 91 | title = 'Debug window' 92 | global _glfw_window 93 | if _glfw_window is None: 94 | glfw.init() 95 | _glfw_window = glfw.create_window(width, height, title, None, None) 96 | glfw.make_context_current(_glfw_window) 97 | glfw.show_window(_glfw_window) 98 | glfw.swap_interval(0) 99 | else: 100 | glfw.make_context_current(_glfw_window) 101 | glfw.set_window_title(_glfw_window, title) 102 | glfw.set_window_size(_glfw_window, width, height) 103 | 104 | # Update window. 105 | glfw.poll_events() 106 | gl.glClearColor(0, 0, 0, 1) 107 | gl.glClear(gl.GL_COLOR_BUFFER_BIT) 108 | gl.glWindowPos2f(0, 0) 109 | gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1) 110 | gl_format = {3: gl.GL_RGB, 2: gl.GL_RG, 1: gl.GL_LUMINANCE}[channels] 111 | gl_dtype = {'uint8': gl.GL_UNSIGNED_BYTE, 'float32': gl.GL_FLOAT}[image.dtype.name] 112 | gl.glDrawPixels(width, height, gl_format, gl_dtype, image[::-1]) 113 | glfw.swap_buffers(_glfw_window) 114 | if glfw.window_should_close(_glfw_window): 115 | return False 116 | return True 117 | 118 | #---------------------------------------------------------------------------- 119 | # Image save helper. 120 | #---------------------------------------------------------------------------- 121 | 122 | def save_image(fn, x): 123 | import imageio 124 | x = np.rint(x * 255.0) 125 | x = np.clip(x, 0, 255).astype(np.uint8) 126 | imageio.imsave(fn, x) 127 | 128 | #---------------------------------------------------------------------------- 129 | 130 | # TensorFlow utilities 131 | 132 | #---------------------------------------------------------------------------- 133 | 134 | def _sanitize_tf_config(config_dict: dict = None) -> dict: 135 | # Defaults. 136 | cfg = dict() 137 | cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is. 138 | cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is. 139 | cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. 140 | cfg["env.HDF5_USE_FILE_LOCKING"] = "FALSE" # Disable HDF5 file locking to avoid concurrency issues with network shares. 141 | cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. 142 | cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. 143 | 144 | # Remove defaults for environment variables that are already set. 145 | for key in list(cfg): 146 | fields = key.split(".") 147 | if fields[0] == "env": 148 | assert len(fields) == 2 149 | if fields[1] in os.environ: 150 | del cfg[key] 151 | 152 | # User overrides. 153 | if config_dict is not None: 154 | cfg.update(config_dict) 155 | return cfg 156 | 157 | 158 | def init_tf(config_dict: dict = None) -> None: 159 | """Initialize TensorFlow session using good default settings.""" 160 | # Skip if already initialized. 161 | if tf.get_default_session() is not None: 162 | return 163 | 164 | # Setup config dict and random seeds. 165 | cfg = _sanitize_tf_config(config_dict) 166 | np_random_seed = cfg["rnd.np_random_seed"] 167 | if np_random_seed is not None: 168 | np.random.seed(np_random_seed) 169 | tf_random_seed = cfg["rnd.tf_random_seed"] 170 | if tf_random_seed == "auto": 171 | tf_random_seed = np.random.randint(1 << 31) 172 | if tf_random_seed is not None: 173 | tf.set_random_seed(tf_random_seed) 174 | 175 | # Setup environment variables. 176 | for key, value in cfg.items(): 177 | fields = key.split(".") 178 | if fields[0] == "env": 179 | assert len(fields) == 2 180 | os.environ[fields[1]] = str(value) 181 | 182 | # Create default TensorFlow session. 183 | create_session(cfg, force_as_default=True) 184 | 185 | 186 | def assert_tf_initialized(): 187 | """Check that TensorFlow session has been initialized.""" 188 | if tf.get_default_session() is None: 189 | raise RuntimeError("No default TensorFlow session found. Please call util.init_tf().") 190 | 191 | 192 | def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session: 193 | """Create tf.Session based on config dict.""" 194 | # Setup TensorFlow config proto. 195 | cfg = _sanitize_tf_config(config_dict) 196 | config_proto = tf.ConfigProto() 197 | for key, value in cfg.items(): 198 | fields = key.split(".") 199 | if fields[0] not in ["rnd", "env"]: 200 | obj = config_proto 201 | for field in fields[:-1]: 202 | obj = getattr(obj, field) 203 | setattr(obj, fields[-1], value) 204 | 205 | # Create session. 206 | session = tf.Session(config=config_proto) 207 | if force_as_default: 208 | # pylint: disable=protected-access 209 | session._default_session = session.as_default() 210 | session._default_session.enforce_nesting = False 211 | session._default_session.__enter__() 212 | return session 213 | 214 | 215 | def is_tf_expression(x: Any) -> bool: 216 | """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation.""" 217 | return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation)) 218 | 219 | 220 | def absolute_name_scope(scope: str) -> tf.name_scope: 221 | """Forcefully enter the specified name scope, ignoring any surrounding scopes.""" 222 | return tf.name_scope(scope + "/") 223 | 224 | 225 | def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None: 226 | """Initialize all tf.Variables that have not already been initialized. 227 | 228 | Equivalent to the following, but more efficient and does not bloat the tf graph: 229 | tf.variables_initializer(tf.report_uninitialized_variables()).run() 230 | """ 231 | assert_tf_initialized() 232 | if target_vars is None: 233 | target_vars = tf.global_variables() 234 | 235 | test_vars = [] 236 | test_ops = [] 237 | 238 | with tf.control_dependencies(None): # ignore surrounding control_dependencies 239 | for var in target_vars: 240 | assert is_tf_expression(var) 241 | 242 | try: 243 | tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0")) 244 | except KeyError: 245 | # Op does not exist => variable may be uninitialized. 246 | test_vars.append(var) 247 | 248 | with absolute_name_scope(var.name.split(":")[0]): 249 | test_ops.append(tf.is_variable_initialized(var)) 250 | 251 | init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] 252 | run([var.initializer for var in init_vars]) 253 | 254 | def run(*args, **kwargs) -> Any: 255 | """Run the specified ops in the default session.""" 256 | assert_tf_initialized() 257 | return tf.get_default_session().run(*args, **kwargs) 258 | -------------------------------------------------------------------------------- /samples/torch/cube.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import argparse 10 | import os 11 | import pathlib 12 | import sys 13 | import numpy as np 14 | import torch 15 | import imageio 16 | 17 | import util 18 | 19 | import nvdiffrast.torch as dr 20 | 21 | # Transform vertex positions to clip space 22 | def transform_pos(mtx, pos): 23 | t_mtx = torch.from_numpy(mtx).cuda() if isinstance(mtx, np.ndarray) else mtx 24 | # (x,y,z) -> (x,y,z,1) 25 | posw = torch.cat([pos, torch.ones([pos.shape[0], 1]).cuda()], axis=1) 26 | return torch.matmul(posw, t_mtx.t())[None, ...] 27 | 28 | def render(glctx, mtx, pos, pos_idx, vtx_col, col_idx, resolution: int): 29 | pos_clip = transform_pos(mtx, pos) 30 | rast_out, _ = dr.rasterize(glctx, pos_clip, pos_idx, resolution=[resolution, resolution]) 31 | color, _ = dr.interpolate(vtx_col[None, ...], rast_out, col_idx) 32 | color = dr.antialias(color, rast_out, pos_clip, pos_idx) 33 | return color 34 | 35 | def make_grid(arr, ncols=2): 36 | n, height, width, nc = arr.shape 37 | nrows = n//ncols 38 | assert n == nrows*ncols 39 | return arr.reshape(nrows, ncols, height, width, nc).swapaxes(1,2).reshape(height*nrows, width*ncols, nc) 40 | 41 | def fit_cube(max_iter = 5000, 42 | resolution = 4, 43 | discontinuous = False, 44 | repeats = 1, 45 | log_interval = 10, 46 | display_interval = None, 47 | display_res = 512, 48 | out_dir = None, 49 | log_fn = None, 50 | mp4save_interval = None, 51 | mp4save_fn = None, 52 | use_opengl = False): 53 | 54 | log_file = None 55 | writer = None 56 | if out_dir: 57 | os.makedirs(out_dir, exist_ok=True) 58 | if log_fn: 59 | log_file = open(f'{out_dir}/{log_fn}', 'wt') 60 | if mp4save_interval != 0: 61 | writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M') 62 | else: 63 | mp4save_interval = None 64 | 65 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' 66 | fn = 'cube_%s.npz' % ('d' if discontinuous else 'c') 67 | with np.load(f'{datadir}/{fn}') as f: 68 | pos_idx, vtxp, col_idx, vtxc = f.values() 69 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0])) 70 | 71 | # Create position/triangle index tensors 72 | pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda() 73 | col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda() 74 | vtx_pos = torch.from_numpy(vtxp.astype(np.float32)).cuda() 75 | vtx_col = torch.from_numpy(vtxc.astype(np.float32)).cuda() 76 | 77 | # Rasterizer context 78 | glctx = dr.RasterizeGLContext() if use_opengl else dr.RasterizeCudaContext() 79 | 80 | # Repeats. 81 | for rep in range(repeats): 82 | 83 | ang = 0.0 84 | gl_avg = [] 85 | 86 | vtx_pos_rand = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp 87 | vtx_col_rand = np.random.uniform(0.0, 1.0, size=vtxc.shape) 88 | vtx_pos_opt = torch.tensor(vtx_pos_rand, dtype=torch.float32, device='cuda', requires_grad=True) 89 | vtx_col_opt = torch.tensor(vtx_col_rand, dtype=torch.float32, device='cuda', requires_grad=True) 90 | 91 | # Adam optimizer for vertex position and color with a learning rate ramp. 92 | optimizer = torch.optim.Adam([vtx_pos_opt, vtx_col_opt], lr=1e-2) 93 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: max(0.01, 10**(-x*0.0005))) 94 | 95 | for it in range(max_iter + 1): 96 | # Random rotation/translation matrix for optimization. 97 | r_rot = util.random_rotation_translation(0.25) 98 | 99 | # Smooth rotation for display. 100 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) 101 | 102 | # Modelview and modelview + projection matrices. 103 | proj = util.projection(x=0.4) 104 | r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot) 105 | r_mvp = np.matmul(proj, r_mv).astype(np.float32) 106 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) 107 | a_mvp = np.matmul(proj, a_mv).astype(np.float32) 108 | 109 | # Compute geometric error for logging. 110 | with torch.no_grad(): 111 | geom_loss = torch.mean(torch.sum((torch.abs(vtx_pos_opt) - .5)**2, dim=1)**0.5) 112 | gl_avg.append(float(geom_loss)) 113 | 114 | # Print/save log. 115 | if log_interval and (it % log_interval == 0): 116 | gl_val = np.mean(np.asarray(gl_avg)) 117 | gl_avg = [] 118 | s = ("rep=%d," % rep) if repeats > 1 else "" 119 | s += "iter=%d,err=%f" % (it, gl_val) 120 | print(s) 121 | if log_file: 122 | log_file.write(s + "\n") 123 | 124 | color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_col, col_idx, resolution) 125 | color_opt = render(glctx, r_mvp, vtx_pos_opt, pos_idx, vtx_col_opt, col_idx, resolution) 126 | 127 | # Compute loss and train. 128 | loss = torch.mean((color - color_opt)**2) # L2 pixel loss. 129 | optimizer.zero_grad() 130 | loss.backward() 131 | optimizer.step() 132 | scheduler.step() 133 | 134 | # Show/save image. 135 | display_image = display_interval and (it % display_interval == 0) 136 | save_mp4 = mp4save_interval and (it % mp4save_interval == 0) 137 | 138 | if display_image or save_mp4: 139 | ang = ang + 0.01 140 | 141 | img_b = color[0].cpu().numpy()[::-1] 142 | img_o = color_opt[0].detach().cpu().numpy()[::-1] 143 | img_d = render(glctx, a_mvp, vtx_pos_opt, pos_idx, vtx_col_opt, col_idx, display_res)[0] 144 | img_r = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_col, col_idx, display_res)[0] 145 | 146 | scl = display_res // img_o.shape[0] 147 | img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1) 148 | img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1) 149 | result_image = make_grid(np.stack([img_o, img_b, img_d.detach().cpu().numpy()[::-1], img_r.cpu().numpy()[::-1]])) 150 | 151 | if display_image: 152 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) 153 | if save_mp4: 154 | writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8)) 155 | 156 | # Done. 157 | if writer is not None: 158 | writer.close() 159 | if log_file: 160 | log_file.close() 161 | 162 | #---------------------------------------------------------------------------- 163 | 164 | def main(): 165 | parser = argparse.ArgumentParser(description='Cube fit example') 166 | parser.add_argument('--opengl', help='enable OpenGL rendering', action='store_true', default=False) 167 | parser.add_argument('--outdir', help='specify output directory', default='') 168 | parser.add_argument('--discontinuous', action='store_true', default=False) 169 | parser.add_argument('--resolution', type=int, default=0, required=True) 170 | parser.add_argument('--display-interval', type=int, default=0) 171 | parser.add_argument('--mp4save-interval', type=int, default=100) 172 | parser.add_argument('--max-iter', type=int, default=1000) 173 | args = parser.parse_args() 174 | 175 | # Set up logging. 176 | if args.outdir: 177 | ds = 'd' if args.discontinuous else 'c' 178 | out_dir = f'{args.outdir}/cube_{ds}_{args.resolution}' 179 | print (f'Saving results under {out_dir}') 180 | else: 181 | out_dir = None 182 | print ('No output directory specified, not saving log or images') 183 | 184 | # Run. 185 | fit_cube( 186 | max_iter=args.max_iter, 187 | resolution=args.resolution, 188 | discontinuous=args.discontinuous, 189 | log_interval=10, 190 | display_interval=args.display_interval, 191 | out_dir=out_dir, 192 | log_fn='log.txt', 193 | mp4save_interval=args.mp4save_interval, 194 | mp4save_fn='progress.mp4', 195 | use_opengl=args.opengl 196 | ) 197 | 198 | # Done. 199 | print("Done.") 200 | 201 | #---------------------------------------------------------------------------- 202 | 203 | if __name__ == "__main__": 204 | main() 205 | 206 | #---------------------------------------------------------------------------- 207 | -------------------------------------------------------------------------------- /samples/torch/earth.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import argparse 10 | import os 11 | import pathlib 12 | import sys 13 | import numpy as np 14 | import torch 15 | 16 | import util 17 | 18 | import nvdiffrast.torch as dr 19 | 20 | #---------------------------------------------------------------------------- 21 | # Helpers. 22 | 23 | def transform_pos(mtx, pos): 24 | t_mtx = torch.from_numpy(mtx).cuda() if isinstance(mtx, np.ndarray) else mtx 25 | posw = torch.cat([pos, torch.ones([pos.shape[0], 1]).cuda()], axis=1) 26 | return torch.matmul(posw, t_mtx.t())[None, ...] 27 | 28 | def render(glctx, mtx, pos, pos_idx, uv, uv_idx, tex, resolution, enable_mip, max_mip_level): 29 | pos_clip = transform_pos(mtx, pos) 30 | rast_out, rast_out_db = dr.rasterize(glctx, pos_clip, pos_idx, resolution=[resolution, resolution]) 31 | 32 | if enable_mip: 33 | texc, texd = dr.interpolate(uv[None, ...], rast_out, uv_idx, rast_db=rast_out_db, diff_attrs='all') 34 | color = dr.texture(tex[None, ...], texc, texd, filter_mode='linear-mipmap-linear', max_mip_level=max_mip_level) 35 | else: 36 | texc, _ = dr.interpolate(uv[None, ...], rast_out, uv_idx) 37 | color = dr.texture(tex[None, ...], texc, filter_mode='linear') 38 | 39 | color = color * torch.clamp(rast_out[..., -1:], 0, 1) # Mask out background. 40 | return color 41 | 42 | #---------------------------------------------------------------------------- 43 | 44 | def fit_earth(max_iter = 20000, 45 | log_interval = 10, 46 | display_interval = None, 47 | display_res = 1024, 48 | enable_mip = True, 49 | res = 512, 50 | ref_res = 2048, # Dropped from 4096 to 2048 to allow using the Cuda rasterizer. 51 | lr_base = 1e-2, 52 | lr_ramp = 0.1, 53 | out_dir = None, 54 | log_fn = None, 55 | texsave_interval = None, 56 | texsave_fn = None, 57 | imgsave_interval = None, 58 | imgsave_fn = None, 59 | use_opengl = False): 60 | 61 | log_file = None 62 | if out_dir: 63 | os.makedirs(out_dir, exist_ok=True) 64 | if log_fn: 65 | log_file = open(out_dir + '/' + log_fn, 'wt') 66 | else: 67 | imgsave_interval, texsave_interval = None, None 68 | 69 | # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at 70 | # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125 71 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' 72 | with np.load(f'{datadir}/earth.npz') as f: 73 | pos_idx, pos, uv_idx, uv, tex = f.values() 74 | tex = tex.astype(np.float32)/255.0 75 | max_mip_level = 9 # Texture is a 4x3 atlas of 512x512 maps. 76 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0])) 77 | 78 | # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1). Drop 79 | # the last column in that case. 80 | if pos.shape[1] == 4: pos = pos[:, 0:3] 81 | 82 | # Create position/triangle index tensors 83 | pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda() 84 | vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda() 85 | uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda() 86 | vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda() 87 | 88 | tex = torch.from_numpy(tex.astype(np.float32)).cuda() 89 | tex_opt = torch.full(tex.shape, 0.2, device='cuda', requires_grad=True) 90 | glctx = dr.RasterizeGLContext() if use_opengl else dr.RasterizeCudaContext() 91 | 92 | ang = 0.0 93 | 94 | # Adam optimizer for texture with a learning rate ramp. 95 | optimizer = torch.optim.Adam([tex_opt], lr=lr_base) 96 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: lr_ramp**(float(x)/float(max_iter))) 97 | 98 | # Render. 99 | ang = 0.0 100 | texloss_avg = [] 101 | for it in range(max_iter + 1): 102 | # Random rotation/translation matrix for optimization. 103 | r_rot = util.random_rotation_translation(0.25) 104 | 105 | # Smooth rotation for display. 106 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) 107 | dist = np.random.uniform(0.0, 48.5) 108 | 109 | # Modelview and modelview + projection matrices. 110 | proj = util.projection(x=0.4, n=1.0, f=200.0) 111 | r_mv = np.matmul(util.translate(0, 0, -1.5-dist), r_rot) 112 | r_mvp = np.matmul(proj, r_mv).astype(np.float32) 113 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) 114 | a_mvp = np.matmul(proj, a_mv).astype(np.float32) 115 | 116 | # Measure texture-space RMSE loss 117 | with torch.no_grad(): 118 | texmask = torch.zeros_like(tex) 119 | tr = tex.shape[1]//4 120 | texmask[tr+13:2*tr-13, 25:-25, :] += 1.0 121 | texmask[25:-25, tr+13:2*tr-13, :] += 1.0 122 | # Measure only relevant portions of texture when calculating texture 123 | # PSNR. 124 | texloss = (torch.sum(texmask * (tex - tex_opt)**2)/torch.sum(texmask))**0.5 # RMSE within masked area. 125 | texloss_avg.append(float(texloss)) 126 | 127 | # Render reference and optimized frames. Always enable mipmapping for reference. 128 | color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex, ref_res, True, max_mip_level) 129 | color_opt = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex_opt, res, enable_mip, max_mip_level) 130 | 131 | # Reduce the reference to correct size. 132 | while color.shape[1] > res: 133 | color = util.bilinear_downsample(color) 134 | 135 | # Compute loss and perform a training step. 136 | loss = torch.mean((color - color_opt)**2) # L2 pixel loss. 137 | optimizer.zero_grad() 138 | loss.backward() 139 | optimizer.step() 140 | scheduler.step() 141 | 142 | # Print/save log. 143 | if log_interval and (it % log_interval == 0): 144 | texloss_val = np.mean(np.asarray(texloss_avg)) 145 | texloss_avg = [] 146 | psnr = -10.0 * np.log10(texloss_val**2) # PSNR based on average RMSE. 147 | s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr) 148 | print(s) 149 | if log_file: 150 | log_file.write(s + '\n') 151 | 152 | # Show/save image. 153 | display_image = display_interval and (it % display_interval == 0) 154 | save_image = imgsave_interval and (it % imgsave_interval == 0) 155 | save_texture = texsave_interval and (it % texsave_interval) == 0 156 | 157 | if display_image or save_image: 158 | ang = ang + 0.1 159 | 160 | with torch.no_grad(): 161 | result_image = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex_opt, res, enable_mip, max_mip_level)[0].cpu().numpy()[::-1] 162 | 163 | if display_image: 164 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) 165 | if save_image: 166 | util.save_image(out_dir + '/' + (imgsave_fn % it), result_image) 167 | 168 | if save_texture: 169 | texture = tex_opt.cpu().numpy()[::-1] 170 | util.save_image(out_dir + '/' + (texsave_fn % it), texture) 171 | 172 | 173 | # Done. 174 | if log_file: 175 | log_file.close() 176 | 177 | #---------------------------------------------------------------------------- 178 | 179 | def main(): 180 | parser = argparse.ArgumentParser(description='Earth texture fitting example') 181 | parser.add_argument('--opengl', help='enable OpenGL rendering', action='store_true', default=False) 182 | parser.add_argument('--outdir', help='specify output directory', default='') 183 | parser.add_argument('--mip', help='enable mipmapping', action='store_true', default=False) 184 | parser.add_argument('--display-interval', type=int, default=0) 185 | parser.add_argument('--max-iter', type=int, default=10000) 186 | args = parser.parse_args() 187 | 188 | # Set up logging. 189 | if args.outdir: 190 | ms = 'mip' if args.mip else 'nomip' 191 | out_dir = f'{args.outdir}/earth_{ms}' 192 | print (f'Saving results under {out_dir}') 193 | else: 194 | out_dir = None 195 | print ('No output directory specified, not saving log or images') 196 | 197 | # Run. 198 | fit_earth(max_iter=args.max_iter, log_interval=10, display_interval=args.display_interval, enable_mip=args.mip, out_dir=out_dir, log_fn='log.txt', texsave_interval=1000, texsave_fn='tex_%06d.png', imgsave_interval=1000, imgsave_fn='img_%06d.png', use_opengl=args.opengl) 199 | 200 | # Done. 201 | print("Done.") 202 | 203 | #---------------------------------------------------------------------------- 204 | 205 | if __name__ == "__main__": 206 | main() 207 | 208 | #---------------------------------------------------------------------------- 209 | -------------------------------------------------------------------------------- /samples/torch/envphong.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import argparse 10 | import numpy as np 11 | import torch 12 | import os 13 | import sys 14 | import pathlib 15 | import imageio 16 | 17 | import util 18 | 19 | import nvdiffrast.torch as dr 20 | 21 | #---------------------------------------------------------------------------- 22 | # Environment map and Phong BRDF learning. 23 | #---------------------------------------------------------------------------- 24 | 25 | def fit_env_phong(max_iter = 1000, 26 | log_interval = 10, 27 | display_interval = None, 28 | display_res = 1024, 29 | res = 1024, 30 | lr_base = 1e-2, 31 | lr_ramp = 1.0, 32 | out_dir = None, 33 | log_fn = None, 34 | mp4save_interval = None, 35 | mp4save_fn = None, 36 | use_opengl = False): 37 | 38 | log_file = None 39 | writer = None 40 | if out_dir: 41 | os.makedirs(out_dir, exist_ok=True) 42 | if log_fn: 43 | log_file = open(out_dir + '/' + log_fn, 'wt') 44 | if mp4save_interval != 0: 45 | writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M') 46 | else: 47 | mp4save_interval = None 48 | 49 | # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap 50 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' 51 | with np.load(f'{datadir}/envphong.npz') as f: 52 | pos_idx, pos, normals, env = f.values() 53 | env = env.astype(np.float32)/255.0 54 | env = np.stack(env)[:, ::-1].copy() 55 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0])) 56 | 57 | # Move all the stuff to GPU. 58 | pos_idx = torch.as_tensor(pos_idx, dtype=torch.int32, device='cuda') 59 | pos = torch.as_tensor(pos, dtype=torch.float32, device='cuda') 60 | normals = torch.as_tensor(normals, dtype=torch.float32, device='cuda') 61 | env = torch.as_tensor(env, dtype=torch.float32, device='cuda') 62 | 63 | # Target Phong parameters. 64 | phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32) 65 | phong_exp = 25.0 66 | phong_rgb_t = torch.as_tensor(phong_rgb, dtype=torch.float32, device='cuda') 67 | 68 | # Learned variables: environment maps, phong color, phong exponent. 69 | env_var = torch.ones_like(env) * .5 70 | env_var.requires_grad_() 71 | phong_var_raw = torch.as_tensor(np.random.uniform(size=[4]), dtype=torch.float32, device='cuda') 72 | phong_var_raw.requires_grad_() 73 | phong_var_mul = torch.as_tensor([1.0, 1.0, 1.0, 10.0], dtype=torch.float32, device='cuda') 74 | 75 | # Render. 76 | ang = 0.0 77 | imgloss_avg, phong_avg = [], [] 78 | glctx = dr.RasterizeGLContext() if use_opengl else dr.RasterizeCudaContext() 79 | zero_tensor = torch.as_tensor(0.0, dtype=torch.float32, device='cuda') 80 | one_tensor = torch.as_tensor(1.0, dtype=torch.float32, device='cuda') 81 | 82 | # Adam optimizer for environment map and phong with a learning rate ramp. 83 | optimizer = torch.optim.Adam([env_var, phong_var_raw], lr=lr_base) 84 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: lr_ramp**(float(x)/float(max_iter))) 85 | 86 | for it in range(max_iter + 1): 87 | phong_var = phong_var_raw * phong_var_mul 88 | 89 | # Random rotation/translation matrix for optimization. 90 | r_rot = util.random_rotation_translation(0.25) 91 | 92 | # Smooth rotation for display. 93 | ang = ang + 0.01 94 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) 95 | 96 | # Modelview and modelview + projection matrices. 97 | proj = util.projection(x=0.4, n=1.0, f=200.0) 98 | r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot) 99 | r_mvp = np.matmul(proj, r_mv).astype(np.float32) 100 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) 101 | a_mvp = np.matmul(proj, a_mv).astype(np.float32) 102 | a_mvc = a_mvp 103 | r_mvp = torch.as_tensor(r_mvp, dtype=torch.float32, device='cuda') 104 | a_mvp = torch.as_tensor(a_mvp, dtype=torch.float32, device='cuda') 105 | 106 | # Solve camera positions. 107 | a_campos = torch.as_tensor(np.linalg.inv(a_mv)[:3, 3], dtype=torch.float32, device='cuda') 108 | r_campos = torch.as_tensor(np.linalg.inv(r_mv)[:3, 3], dtype=torch.float32, device='cuda') 109 | 110 | # Random light direction. 111 | lightdir = np.random.normal(size=[3]) 112 | lightdir /= np.linalg.norm(lightdir) + 1e-8 113 | lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda') 114 | 115 | def render_refl(ldir, cpos, mvp): 116 | # Transform and rasterize. 117 | viewvec = pos[..., :3] - cpos[np.newaxis, np.newaxis, :] # View vectors at vertices. 118 | reflvec = viewvec - 2.0 * normals[np.newaxis, ...] * torch.sum(normals[np.newaxis, ...] * viewvec, -1, keepdim=True) # Reflection vectors at vertices. 119 | reflvec = reflvec / torch.sum(reflvec**2, -1, keepdim=True)**0.5 # Normalize. 120 | pos_clip = torch.matmul(pos, mvp.t())[np.newaxis, ...] 121 | rast_out, rast_out_db = dr.rasterize(glctx, pos_clip, pos_idx, [res, res]) 122 | refl, refld = dr.interpolate(reflvec, rast_out, pos_idx, rast_db=rast_out_db, diff_attrs='all') # Interpolated reflection vectors. 123 | 124 | # Phong light. 125 | refl = refl / (torch.sum(refl**2, -1, keepdim=True) + 1e-8)**0.5 # Normalize. 126 | ldotr = torch.sum(-ldir * refl, -1, keepdim=True) # L dot R. 127 | 128 | # Return 129 | return refl, refld, ldotr, (rast_out[..., -1:] == 0) 130 | 131 | # Render the reflections. 132 | refl, refld, ldotr, mask = render_refl(lightdir, r_campos, r_mvp) 133 | 134 | # Reference color. No need for AA because we are not learning geometry. 135 | color = dr.texture(env[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube') 136 | color = color + phong_rgb_t * torch.max(zero_tensor, ldotr) ** phong_exp # Phong. 137 | color = torch.where(mask, one_tensor, color) # White background. 138 | 139 | # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead. 140 | color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube') 141 | color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3] # Phong. 142 | color_opt = torch.where(mask, one_tensor, color_opt) # White background. 143 | 144 | # Compute loss and train. 145 | loss = torch.mean((color - color_opt)**2) # L2 pixel loss. 146 | optimizer.zero_grad() 147 | loss.backward() 148 | optimizer.step() 149 | scheduler.step() 150 | 151 | # Collect losses. 152 | imgloss_avg.append(loss.detach().cpu().numpy()) 153 | phong_avg.append(phong_var.detach().cpu().numpy()) 154 | 155 | # Print/save log. 156 | if log_interval and (it % log_interval == 0): 157 | imgloss_val, imgloss_avg = np.mean(np.asarray(imgloss_avg, np.float32)), [] 158 | phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32), axis=0), [] 159 | phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5 160 | phong_exp_rel_err = np.abs(phong_val[3] - phong_exp)/phong_exp 161 | s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val) 162 | print(s) 163 | if log_file: 164 | log_file.write(s + '\n') 165 | 166 | # Show/save result image. 167 | display_image = display_interval and (it % display_interval == 0) 168 | save_mp4 = mp4save_interval and (it % mp4save_interval == 0) 169 | 170 | if display_image or save_mp4: 171 | lightdir = np.asarray([.8, -1., .5, 0.0]) 172 | lightdir = np.matmul(a_mvc, lightdir)[:3] 173 | lightdir /= np.linalg.norm(lightdir) 174 | lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda') 175 | refl, refld, ldotr, mask = render_refl(lightdir, a_campos, a_mvp) 176 | color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube') 177 | color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3] 178 | color_opt = torch.where(mask, one_tensor, color_opt) 179 | result_image = color_opt.detach()[0].cpu().numpy()[::-1] 180 | if display_image: 181 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) 182 | if save_mp4: 183 | writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8)) 184 | 185 | # Done. 186 | if writer is not None: 187 | writer.close() 188 | if log_file: 189 | log_file.close() 190 | 191 | #---------------------------------------------------------------------------- 192 | # Main function. 193 | #---------------------------------------------------------------------------- 194 | 195 | def main(): 196 | parser = argparse.ArgumentParser(description='Environment map fitting example') 197 | parser.add_argument('--opengl', help='enable OpenGL rendering', action='store_true', default=False) 198 | parser.add_argument('--outdir', help='specify output directory', default='') 199 | parser.add_argument('--display-interval', type=int, default=0) 200 | parser.add_argument('--mp4save-interval', type=int, default=10) 201 | parser.add_argument('--max-iter', type=int, default=5000) 202 | args = parser.parse_args() 203 | 204 | # Set up logging. 205 | if args.outdir: 206 | out_dir = f'{args.outdir}/env_phong' 207 | print (f'Saving results under {out_dir}') 208 | else: 209 | out_dir = None 210 | print ('No output directory specified, not saving log or images') 211 | 212 | # Run. 213 | fit_env_phong( 214 | max_iter=args.max_iter, 215 | log_interval=100, 216 | display_interval=args.display_interval, 217 | out_dir=out_dir, 218 | mp4save_interval=args.mp4save_interval, 219 | mp4save_fn='progress.mp4', 220 | use_opengl=args.opengl 221 | ) 222 | 223 | # Done. 224 | print("Done.") 225 | 226 | #---------------------------------------------------------------------------- 227 | 228 | if __name__ == "__main__": 229 | main() 230 | 231 | #---------------------------------------------------------------------------- 232 | -------------------------------------------------------------------------------- /samples/torch/triangle.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import imageio 10 | import numpy as np 11 | import torch 12 | import nvdiffrast.torch as dr 13 | import sys 14 | 15 | def tensor(*args, **kwargs): 16 | return torch.tensor(*args, device='cuda', **kwargs) 17 | 18 | if sys.argv[1:] == ['--cuda']: 19 | glctx = dr.RasterizeCudaContext() 20 | elif sys.argv[1:] == ['--opengl']: 21 | glctx = dr.RasterizeGLContext() 22 | else: 23 | print("Specify either --cuda or --opengl") 24 | exit(1) 25 | 26 | pos = tensor([[[-0.8, -0.8, 0, 1], [0.8, -0.8, 0, 1], [-0.8, 0.8, 0, 1]]], dtype=torch.float32) 27 | col = tensor([[[1, 0, 0], [0, 1, 0], [0, 0, 1]]], dtype=torch.float32) 28 | tri = tensor([[0, 1, 2]], dtype=torch.int32) 29 | 30 | rast, _ = dr.rasterize(glctx, pos, tri, resolution=[256, 256]) 31 | out, _ = dr.interpolate(col, rast, tri) 32 | 33 | img = out.cpu().numpy()[0, ::-1, :, :] # Flip vertically. 34 | img = np.clip(np.rint(img * 255), 0, 255).astype(np.uint8) # Quantize to np.uint8 35 | 36 | print("Saving to 'tri.png'.") 37 | imageio.imsave('tri.png', img) 38 | -------------------------------------------------------------------------------- /samples/torch/util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import torch 11 | 12 | #---------------------------------------------------------------------------- 13 | # Projection and transformation matrix helpers. 14 | #---------------------------------------------------------------------------- 15 | 16 | def projection(x=0.1, n=1.0, f=50.0): 17 | return np.array([[n/x, 0, 0, 0], 18 | [ 0, n/x, 0, 0], 19 | [ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)], 20 | [ 0, 0, -1, 0]]).astype(np.float32) 21 | 22 | def translate(x, y, z): 23 | return np.array([[1, 0, 0, x], 24 | [0, 1, 0, y], 25 | [0, 0, 1, z], 26 | [0, 0, 0, 1]]).astype(np.float32) 27 | 28 | def rotate_x(a): 29 | s, c = np.sin(a), np.cos(a) 30 | return np.array([[1, 0, 0, 0], 31 | [0, c, s, 0], 32 | [0, -s, c, 0], 33 | [0, 0, 0, 1]]).astype(np.float32) 34 | 35 | def rotate_y(a): 36 | s, c = np.sin(a), np.cos(a) 37 | return np.array([[ c, 0, s, 0], 38 | [ 0, 1, 0, 0], 39 | [-s, 0, c, 0], 40 | [ 0, 0, 0, 1]]).astype(np.float32) 41 | 42 | def random_rotation_translation(t): 43 | m = np.random.normal(size=[3, 3]) 44 | m[1] = np.cross(m[0], m[2]) 45 | m[2] = np.cross(m[0], m[1]) 46 | m = m / np.linalg.norm(m, axis=1, keepdims=True) 47 | m = np.pad(m, [[0, 1], [0, 1]], mode='constant') 48 | m[3, 3] = 1.0 49 | m[:3, 3] = np.random.uniform(-t, t, size=[3]) 50 | return m 51 | 52 | #---------------------------------------------------------------------------- 53 | # Bilinear downsample by 2x. 54 | #---------------------------------------------------------------------------- 55 | 56 | def bilinear_downsample(x): 57 | w = torch.tensor([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=torch.float32, device=x.device) / 64.0 58 | w = w.expand(x.shape[-1], 1, 4, 4) 59 | x = torch.nn.functional.conv2d(x.permute(0, 3, 1, 2), w, padding=1, stride=2, groups=x.shape[-1]) 60 | return x.permute(0, 2, 3, 1) 61 | 62 | #---------------------------------------------------------------------------- 63 | # Image display function using OpenGL. 64 | #---------------------------------------------------------------------------- 65 | 66 | _glfw_window = None 67 | def display_image(image, zoom=None, size=None, title=None): # HWC 68 | # Import OpenGL and glfw. 69 | import OpenGL.GL as gl 70 | import glfw 71 | 72 | # Zoom image if requested. 73 | image = np.asarray(image) 74 | if size is not None: 75 | assert zoom is None 76 | zoom = max(1, size // image.shape[0]) 77 | if zoom is not None: 78 | image = image.repeat(zoom, axis=0).repeat(zoom, axis=1) 79 | height, width, channels = image.shape 80 | 81 | # Initialize window. 82 | if title is None: 83 | title = 'Debug window' 84 | global _glfw_window 85 | if _glfw_window is None: 86 | glfw.init() 87 | _glfw_window = glfw.create_window(width, height, title, None, None) 88 | glfw.make_context_current(_glfw_window) 89 | glfw.show_window(_glfw_window) 90 | glfw.swap_interval(0) 91 | else: 92 | glfw.make_context_current(_glfw_window) 93 | glfw.set_window_title(_glfw_window, title) 94 | glfw.set_window_size(_glfw_window, width, height) 95 | 96 | # Update window. 97 | glfw.poll_events() 98 | gl.glClearColor(0, 0, 0, 1) 99 | gl.glClear(gl.GL_COLOR_BUFFER_BIT) 100 | gl.glWindowPos2f(0, 0) 101 | gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1) 102 | gl_format = {3: gl.GL_RGB, 2: gl.GL_RG, 1: gl.GL_LUMINANCE}[channels] 103 | gl_dtype = {'uint8': gl.GL_UNSIGNED_BYTE, 'float32': gl.GL_FLOAT}[image.dtype.name] 104 | gl.glDrawPixels(width, height, gl_format, gl_dtype, image[::-1]) 105 | glfw.swap_buffers(_glfw_window) 106 | if glfw.window_should_close(_glfw_window): 107 | return False 108 | return True 109 | 110 | #---------------------------------------------------------------------------- 111 | # Image save helper. 112 | #---------------------------------------------------------------------------- 113 | 114 | def save_image(fn, x): 115 | import imageio 116 | x = np.rint(x * 255.0) 117 | x = np.clip(x, 0, 255).astype(np.uint8) 118 | imageio.imsave(fn, x) 119 | 120 | #---------------------------------------------------------------------------- 121 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import nvdiffrast 10 | import setuptools 11 | import os 12 | 13 | with open("README.md", "r") as fh: 14 | long_description = fh.read() 15 | 16 | setuptools.setup( 17 | name="nvdiffrast", 18 | version=nvdiffrast.__version__, 19 | author="Samuli Laine", 20 | author_email="slaine@nvidia.com", 21 | description="nvdiffrast - modular primitives for high-performance differentiable rendering", 22 | long_description=long_description, 23 | long_description_content_type="text/markdown", 24 | url="https://github.com/NVlabs/nvdiffrast", 25 | packages=setuptools.find_packages(), 26 | package_data={ 27 | 'nvdiffrast': [ 28 | 'common/*.h', 29 | 'common/*.inl', 30 | 'common/*.cu', 31 | 'common/*.cpp', 32 | 'common/cudaraster/*.hpp', 33 | 'common/cudaraster/impl/*.cpp', 34 | 'common/cudaraster/impl/*.hpp', 35 | 'common/cudaraster/impl/*.inl', 36 | 'common/cudaraster/impl/*.cu', 37 | 'lib/*.h', 38 | 'torch/*.h', 39 | 'torch/*.inl', 40 | 'torch/*.cpp', 41 | 'tensorflow/*.cu', 42 | ] + (['lib/*.lib'] if os.name == 'nt' else []) 43 | }, 44 | include_package_data=True, 45 | install_requires=['numpy'], # note: can't require torch here as it will install torch even for a TensorFlow container 46 | classifiers=[ 47 | "Programming Language :: Python :: 3", 48 | "Operating System :: OS Independent", 49 | ], 50 | python_requires='>=3.6', 51 | ) 52 | --------------------------------------------------------------------------------