├── LICENSE.txt
├── README.md
├── docker
├── 10_nvidia.json
└── Dockerfile
├── docs
├── img
│ ├── cube.png
│ ├── earth.png
│ ├── envphong.png
│ ├── logo.png
│ ├── pipe_cube.png
│ ├── pipe_earth.png
│ ├── pipe_envphong.png
│ ├── pose.png
│ ├── spot_aa.png
│ ├── spot_crop1.png
│ ├── spot_crop2.png
│ ├── spot_diff1.png
│ ├── spot_diff2.png
│ ├── spot_peel1.png
│ ├── spot_peel2.png
│ ├── spot_st.png
│ ├── spot_tex.png
│ ├── spot_texture.png
│ ├── spot_texw.png
│ ├── spot_tri.png
│ ├── spot_uv.png
│ ├── teaser.png
│ ├── teaser1.png
│ ├── teaser2.png
│ ├── teaser3.png
│ ├── teaser4.png
│ ├── teaser5.png
│ ├── thumb.jpg
│ └── tri.png
└── index.html
├── nvdiffrast
├── __init__.py
├── common
│ ├── antialias.cu
│ ├── antialias.h
│ ├── common.cpp
│ ├── common.h
│ ├── cudaraster
│ │ ├── CudaRaster.hpp
│ │ └── impl
│ │ │ ├── BinRaster.inl
│ │ │ ├── Buffer.cpp
│ │ │ ├── Buffer.hpp
│ │ │ ├── CoarseRaster.inl
│ │ │ ├── Constants.hpp
│ │ │ ├── CudaRaster.cpp
│ │ │ ├── Defs.hpp
│ │ │ ├── FineRaster.inl
│ │ │ ├── PrivateDefs.hpp
│ │ │ ├── RasterImpl.cpp
│ │ │ ├── RasterImpl.cu
│ │ │ ├── RasterImpl.hpp
│ │ │ ├── TriangleSetup.inl
│ │ │ └── Util.inl
│ ├── framework.h
│ ├── glutil.cpp
│ ├── glutil.h
│ ├── glutil_extlist.h
│ ├── interpolate.cu
│ ├── interpolate.h
│ ├── rasterize.cu
│ ├── rasterize.h
│ ├── rasterize_gl.cpp
│ ├── rasterize_gl.h
│ ├── texture.cpp
│ ├── texture.cu
│ └── texture.h
├── lib
│ └── setgpu.lib
├── tensorflow
│ ├── __init__.py
│ ├── ops.py
│ ├── plugin_loader.py
│ ├── tf_all.cu
│ ├── tf_antialias.cu
│ ├── tf_interpolate.cu
│ ├── tf_rasterize.cu
│ └── tf_texture.cu
└── torch
│ ├── __init__.py
│ ├── ops.py
│ ├── torch_antialias.cpp
│ ├── torch_bindings.cpp
│ ├── torch_bindings_gl.cpp
│ ├── torch_common.inl
│ ├── torch_interpolate.cpp
│ ├── torch_rasterize.cpp
│ ├── torch_rasterize_gl.cpp
│ ├── torch_texture.cpp
│ └── torch_types.h
├── run_sample.sh
├── samples
├── data
│ ├── NOTICE.txt
│ ├── cube_c.npz
│ ├── cube_d.npz
│ ├── cube_p.npz
│ ├── earth.npz
│ └── envphong.npz
├── tensorflow
│ ├── cube.py
│ ├── earth.py
│ ├── envphong.py
│ ├── pose.py
│ ├── triangle.py
│ └── util.py
└── torch
│ ├── cube.py
│ ├── earth.py
│ ├── envphong.py
│ ├── pose.py
│ ├── triangle.py
│ └── util.py
└── setup.py
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
2 |
3 |
4 | Nvidia Source Code License (1-Way Commercial)
5 |
6 | =======================================================================
7 |
8 | 1. Definitions
9 |
10 | "Licensor" means any person or entity that distributes its Work.
11 |
12 | "Software" means the original work of authorship made available under
13 | this License.
14 |
15 | "Work" means the Software and any additions to or derivative works of
16 | the Software that are made available under this License.
17 |
18 | The terms "reproduce," "reproduction," "derivative works," and
19 | "distribution" have the meaning as provided under U.S. copyright law;
20 | provided, however, that for the purposes of this License, derivative
21 | works shall not include works that remain separable from, or merely
22 | link (or bind by name) to the interfaces of, the Work.
23 |
24 | Works, including the Software, are "made available" under this License
25 | by including in or with the Work either (a) a copyright notice
26 | referencing the applicability of this License to the Work, or (b) a
27 | copy of this License.
28 |
29 | 2. License Grants
30 |
31 | 2.1 Copyright Grant. Subject to the terms and conditions of this
32 | License, each Licensor grants to you a perpetual, worldwide,
33 | non-exclusive, royalty-free, copyright license to reproduce,
34 | prepare derivative works of, publicly display, publicly perform,
35 | sublicense and distribute its Work and any resulting derivative
36 | works in any form.
37 |
38 | 3. Limitations
39 |
40 | 3.1 Redistribution. You may reproduce or distribute the Work only
41 | if (a) you do so under this License, (b) you include a complete
42 | copy of this License with your distribution, and (c) you retain
43 | without modification any copyright, patent, trademark, or
44 | attribution notices that are present in the Work.
45 |
46 | 3.2 Derivative Works. You may specify that additional or different
47 | terms apply to the use, reproduction, and distribution of your
48 | derivative works of the Work ("Your Terms") only if (a) Your Terms
49 | provide that the use limitation in Section 3.3 applies to your
50 | derivative works, and (b) you identify the specific derivative
51 | works that are subject to Your Terms. Notwithstanding Your Terms,
52 | this License (including the redistribution requirements in Section
53 | 3.1) will continue to apply to the Work itself.
54 |
55 | 3.3 Use Limitation. The Work and any derivative works thereof only
56 | may be used or intended for use non-commercially. The Work or
57 | derivative works thereof may be used or intended for use by Nvidia
58 | or its affiliates commercially or non-commercially. As used herein,
59 | "non-commercially" means for research or evaluation purposes only
60 | and not for any direct or indirect monetary gain.
61 |
62 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim
63 | against any Licensor (including any claim, cross-claim or
64 | counterclaim in a lawsuit) to enforce any patents that you allege
65 | are infringed by any Work, then your rights under this License from
66 | such Licensor (including the grant in Section 2.1) will terminate
67 | immediately.
68 |
69 | 3.5 Trademarks. This License does not grant any rights to use any
70 | Licensor's or its affiliates' names, logos, or trademarks, except
71 | as necessary to reproduce the notices described in this License.
72 |
73 | 3.6 Termination. If you violate any term of this License, then your
74 | rights under this License (including the grant in Section 2.1) will
75 | terminate immediately.
76 |
77 | 4. Disclaimer of Warranty.
78 |
79 | THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
80 | KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
81 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
82 | NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
83 | THIS LICENSE.
84 |
85 | 5. Limitation of Liability.
86 |
87 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
88 | THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
89 | SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
90 | INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
91 | OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
92 | (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
93 | LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
94 | COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
95 | THE POSSIBILITY OF SUCH DAMAGES.
96 |
97 | =======================================================================
98 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Nvdiffrast – Modular Primitives for High-Performance Differentiable Rendering
2 |
3 | 
4 |
5 | **Modular Primitives for High-Performance Differentiable Rendering**
6 | Samuli Laine, Janne Hellsten, Tero Karras, Yeongho Seol, Jaakko Lehtinen, Timo Aila
7 | [http://arxiv.org/abs/2011.03277](http://arxiv.org/abs/2011.03277)
8 |
9 | Nvdiffrast is a PyTorch/TensorFlow library that provides high-performance primitive operations for rasterization-based differentiable rendering.
10 | Please refer to ☞☞ [nvdiffrast documentation](https://nvlabs.github.io/nvdiffrast) ☜☜ for more information.
11 |
12 | ## Licenses
13 |
14 | Copyright © 2020–2024, NVIDIA Corporation. All rights reserved.
15 |
16 | This work is made available under the [Nvidia Source Code License](https://github.com/NVlabs/nvdiffrast/blob/main/LICENSE.txt).
17 |
18 | For business inquiries, please visit our website and submit the form: [NVIDIA Research Licensing](https://www.nvidia.com/en-us/research/inquiries/)
19 |
20 | We do not currently accept outside code contributions in the form of pull requests.
21 |
22 | Environment map stored as part of `samples/data/envphong.npz` is derived from a Wave Engine
23 | [sample material](https://github.com/WaveEngine/Samples-2.5/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap)
24 | originally shared under
25 | [MIT License](https://github.com/WaveEngine/Samples-2.5/blob/master/LICENSE.md).
26 | Mesh and texture stored as part of `samples/data/earth.npz` are derived from
27 | [3D Earth Photorealistic 2K](https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125)
28 | model originally made available under
29 | [TurboSquid 3D Model License](https://blog.turbosquid.com/turbosquid-3d-model-license/#3d-model-license).
30 |
31 | ## Citation
32 |
33 | ```
34 | @article{Laine2020diffrast,
35 | title = {Modular Primitives for High-Performance Differentiable Rendering},
36 | author = {Samuli Laine and Janne Hellsten and Tero Karras and Yeongho Seol and Jaakko Lehtinen and Timo Aila},
37 | journal = {ACM Transactions on Graphics},
38 | year = {2020},
39 | volume = {39},
40 | number = {6}
41 | }
42 | ```
43 |
--------------------------------------------------------------------------------
/docker/10_nvidia.json:
--------------------------------------------------------------------------------
1 | {
2 | "file_format_version" : "1.0.0",
3 | "ICD" : {
4 | "library_path" : "libEGL_nvidia.so.0"
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | # Note: Should also work with NVIDIA's Docker image builds such as
10 | #
11 | # nvcr.io/nvidia/pytorch:20.09-py3
12 | #
13 | # This file defaults to pytorch/pytorch as it works on slightly older
14 | # driver versions.
15 | FROM nvcr.io/nvidia/pytorch:23.03-py3
16 |
17 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
18 | pkg-config \
19 | libglvnd0 \
20 | libgl1 \
21 | libglx0 \
22 | libegl1 \
23 | libgles2 \
24 | libglvnd-dev \
25 | libgl1-mesa-dev \
26 | libegl1-mesa-dev \
27 | libgles2-mesa-dev \
28 | cmake \
29 | curl
30 |
31 | ENV PYTHONDONTWRITEBYTECODE=1
32 | ENV PYTHONUNBUFFERED=1
33 |
34 | # for GLEW
35 | ENV LD_LIBRARY_PATH /usr/lib64:$LD_LIBRARY_PATH
36 |
37 | # nvidia-container-runtime
38 | ENV NVIDIA_VISIBLE_DEVICES all
39 | ENV NVIDIA_DRIVER_CAPABILITIES compute,utility,graphics
40 |
41 | # Default pyopengl to EGL for good headless rendering support
42 | ENV PYOPENGL_PLATFORM egl
43 |
44 | COPY docker/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json
45 |
46 | RUN pip install --upgrade pip
47 | RUN pip install ninja imageio imageio-ffmpeg
48 |
49 | COPY nvdiffrast /tmp/pip/nvdiffrast/
50 | COPY README.md setup.py /tmp/pip/
51 | RUN cd /tmp/pip && pip install .
52 |
--------------------------------------------------------------------------------
/docs/img/cube.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/cube.png
--------------------------------------------------------------------------------
/docs/img/earth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/earth.png
--------------------------------------------------------------------------------
/docs/img/envphong.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/envphong.png
--------------------------------------------------------------------------------
/docs/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/logo.png
--------------------------------------------------------------------------------
/docs/img/pipe_cube.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/pipe_cube.png
--------------------------------------------------------------------------------
/docs/img/pipe_earth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/pipe_earth.png
--------------------------------------------------------------------------------
/docs/img/pipe_envphong.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/pipe_envphong.png
--------------------------------------------------------------------------------
/docs/img/pose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/pose.png
--------------------------------------------------------------------------------
/docs/img/spot_aa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_aa.png
--------------------------------------------------------------------------------
/docs/img/spot_crop1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_crop1.png
--------------------------------------------------------------------------------
/docs/img/spot_crop2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_crop2.png
--------------------------------------------------------------------------------
/docs/img/spot_diff1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_diff1.png
--------------------------------------------------------------------------------
/docs/img/spot_diff2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_diff2.png
--------------------------------------------------------------------------------
/docs/img/spot_peel1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_peel1.png
--------------------------------------------------------------------------------
/docs/img/spot_peel2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_peel2.png
--------------------------------------------------------------------------------
/docs/img/spot_st.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_st.png
--------------------------------------------------------------------------------
/docs/img/spot_tex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_tex.png
--------------------------------------------------------------------------------
/docs/img/spot_texture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_texture.png
--------------------------------------------------------------------------------
/docs/img/spot_texw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_texw.png
--------------------------------------------------------------------------------
/docs/img/spot_tri.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_tri.png
--------------------------------------------------------------------------------
/docs/img/spot_uv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/spot_uv.png
--------------------------------------------------------------------------------
/docs/img/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser.png
--------------------------------------------------------------------------------
/docs/img/teaser1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser1.png
--------------------------------------------------------------------------------
/docs/img/teaser2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser2.png
--------------------------------------------------------------------------------
/docs/img/teaser3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser3.png
--------------------------------------------------------------------------------
/docs/img/teaser4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser4.png
--------------------------------------------------------------------------------
/docs/img/teaser5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/teaser5.png
--------------------------------------------------------------------------------
/docs/img/thumb.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/thumb.jpg
--------------------------------------------------------------------------------
/docs/img/tri.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/docs/img/tri.png
--------------------------------------------------------------------------------
/nvdiffrast/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | __version__ = '0.3.3'
10 |
--------------------------------------------------------------------------------
/nvdiffrast/common/antialias.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 | #include "common.h"
11 |
12 | //------------------------------------------------------------------------
13 | // Constants and helpers.
14 |
15 | #define AA_DISCONTINUITY_KERNEL_BLOCK_WIDTH 32
16 | #define AA_DISCONTINUITY_KERNEL_BLOCK_HEIGHT 8
17 | #define AA_ANALYSIS_KERNEL_THREADS_PER_BLOCK 256
18 | #define AA_MESH_KERNEL_THREADS_PER_BLOCK 256
19 | #define AA_HASH_ELEMENTS_PER_TRIANGLE(alloc) ((alloc) >= (2 << 25) ? 4 : 8) // With more than 16777216 triangles (alloc >= 33554432) use smallest possible value of 4 to conserve memory, otherwise use 8 for fewer collisions.
20 | #define AA_LOG_HASH_ELEMENTS_PER_TRIANGLE(alloc) ((alloc) >= (2 << 25) ? 2 : 3)
21 | #define AA_GRAD_KERNEL_THREADS_PER_BLOCK 256
22 |
23 | //------------------------------------------------------------------------
24 | // CUDA kernel params.
25 |
26 | struct AntialiasKernelParams
27 | {
28 | const float* color; // Incoming color buffer.
29 | const float* rasterOut; // Incoming rasterizer output buffer.
30 | const int* tri; // Incoming triangle buffer.
31 | const float* pos; // Incoming position buffer.
32 | float* output; // Output buffer of forward kernel.
33 | const float* dy; // Incoming gradients.
34 | float* gradColor; // Output buffer, color gradient.
35 | float* gradPos; // Output buffer, position gradient.
36 | int4* workBuffer; // Buffer for storing intermediate work items. First item reserved for counters.
37 | uint4* evHash; // Edge-vertex hash.
38 | int allocTriangles; // Number of triangles accommodated by evHash. Always power of two.
39 | int numTriangles; // Number of triangles.
40 | int numVertices; // Number of vertices.
41 | int width; // Input width.
42 | int height; // Input height.
43 | int n; // Minibatch size.
44 | int channels; // Channel count in color input.
45 | float xh, yh; // Transfer to pixel space.
46 | int instance_mode; // 0=normal, 1=instance mode.
47 | int tri_const; // 1 if triangle array is known to be constant.
48 | };
49 |
50 | //------------------------------------------------------------------------
51 |
--------------------------------------------------------------------------------
/nvdiffrast/common/common.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include
10 |
11 | //------------------------------------------------------------------------
12 | // Block and grid size calculators for kernel launches.
13 |
14 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, int width, int height)
15 | {
16 | int maxThreads = maxWidth * maxHeight;
17 | if (maxThreads <= 1 || (width * height) <= 1)
18 | return dim3(1, 1, 1); // Degenerate.
19 |
20 | // Start from max size.
21 | int bw = maxWidth;
22 | int bh = maxHeight;
23 |
24 | // Optimizations for weirdly sized buffers.
25 | if (width < bw)
26 | {
27 | // Decrease block width to smallest power of two that covers the buffer width.
28 | while ((bw >> 1) >= width)
29 | bw >>= 1;
30 |
31 | // Maximize height.
32 | bh = maxThreads / bw;
33 | if (bh > height)
34 | bh = height;
35 | }
36 | else if (height < bh)
37 | {
38 | // Halve height and double width until fits completely inside buffer vertically.
39 | while (bh > height)
40 | {
41 | bh >>= 1;
42 | if (bw < width)
43 | bw <<= 1;
44 | }
45 | }
46 |
47 | // Done.
48 | return dim3(bw, bh, 1);
49 | }
50 |
51 | dim3 getLaunchGridSize(dim3 blockSize, int width, int height, int depth)
52 | {
53 | dim3 gridSize;
54 | gridSize.x = (width - 1) / blockSize.x + 1;
55 | gridSize.y = (height - 1) / blockSize.y + 1;
56 | gridSize.z = (depth - 1) / blockSize.z + 1;
57 | return gridSize;
58 | }
59 |
60 | //------------------------------------------------------------------------
61 |
--------------------------------------------------------------------------------
/nvdiffrast/common/cudaraster/CudaRaster.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 |
11 | //------------------------------------------------------------------------
12 | // This is a slimmed-down and modernized version of the original
13 | // CudaRaster codebase that accompanied the HPG 2011 paper
14 | // "High-Performance Software Rasterization on GPUs" by Laine and Karras.
15 | // Modifications have been made to accommodate post-Volta execution model
16 | // with warp divergence. Support for shading, blending, quad rendering,
17 | // and supersampling have been removed as unnecessary for nvdiffrast.
18 | //------------------------------------------------------------------------
19 |
20 | namespace CR
21 | {
22 |
23 | class RasterImpl;
24 |
25 | //------------------------------------------------------------------------
26 | // Interface class to isolate user from implementation details.
27 | //------------------------------------------------------------------------
28 |
29 | class CudaRaster
30 | {
31 | public:
32 | enum
33 | {
34 | RenderModeFlag_EnableBackfaceCulling = 1 << 0, // Enable backface culling.
35 | RenderModeFlag_EnableDepthPeeling = 1 << 1, // Enable depth peeling. Must have a peel buffer set.
36 | };
37 |
38 | public:
39 | CudaRaster (void);
40 | ~CudaRaster (void);
41 |
42 | void setBufferSize (int width, int height, int numImages); // Width and height are internally rounded up to multiples of tile size (8x8) for buffer sizes.
43 | void setViewport (int width, int height, int offsetX, int offsetY); // Tiled rendering viewport setup.
44 | void setRenderModeFlags (unsigned int renderModeFlags); // Affects all subsequent calls to drawTriangles(). Defaults to zero.
45 | void deferredClear (unsigned int clearColor); // Clears color and depth buffers during next call to drawTriangles().
46 | void setVertexBuffer (void* vertices, int numVertices); // GPU pointer managed by caller. Vertex positions in clip space as float4 (x, y, z, w).
47 | void setIndexBuffer (void* indices, int numTriangles); // GPU pointer managed by caller. Triangle index+color quadruplets as uint4 (idx0, idx1, idx2, color).
48 | bool drawTriangles (const int* ranges, bool peel, cudaStream_t stream); // Ranges (offsets and counts) as #triangles entries, not as bytes. If NULL, draw all triangles. Returns false in case of internal overflow.
49 | void* getColorBuffer (void); // GPU pointer managed by CudaRaster.
50 | void* getDepthBuffer (void); // GPU pointer managed by CudaRaster.
51 | void swapDepthAndPeel (void); // Swap depth and peeling buffers.
52 |
53 | private:
54 | CudaRaster (const CudaRaster&); // forbidden
55 | CudaRaster& operator= (const CudaRaster&); // forbidden
56 |
57 | private:
58 | RasterImpl* m_impl; // Opaque pointer to implementation.
59 | };
60 |
61 | //------------------------------------------------------------------------
62 | } // namespace CR
63 |
64 |
--------------------------------------------------------------------------------
/nvdiffrast/common/cudaraster/impl/Buffer.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "../../framework.h"
10 | #include "Buffer.hpp"
11 |
12 | using namespace CR;
13 |
14 | //------------------------------------------------------------------------
15 | // GPU buffer.
16 | //------------------------------------------------------------------------
17 |
18 | Buffer::Buffer(void)
19 | : m_gpuPtr(NULL),
20 | m_bytes (0)
21 | {
22 | // empty
23 | }
24 |
25 | Buffer::~Buffer(void)
26 | {
27 | if (m_gpuPtr)
28 | cudaFree(m_gpuPtr); // Don't throw an exception.
29 | }
30 |
31 | void Buffer::reset(size_t bytes)
32 | {
33 | if (bytes == m_bytes)
34 | return;
35 |
36 | if (m_gpuPtr)
37 | {
38 | NVDR_CHECK_CUDA_ERROR(cudaFree(m_gpuPtr));
39 | m_gpuPtr = NULL;
40 | }
41 |
42 | if (bytes > 0)
43 | NVDR_CHECK_CUDA_ERROR(cudaMalloc(&m_gpuPtr, bytes));
44 |
45 | m_bytes = bytes;
46 | }
47 |
48 | void Buffer::grow(size_t bytes)
49 | {
50 | if (bytes > m_bytes)
51 | reset(bytes);
52 | }
53 |
54 | //------------------------------------------------------------------------
55 | // Host buffer with page-locked memory.
56 | //------------------------------------------------------------------------
57 |
58 | HostBuffer::HostBuffer(void)
59 | : m_hostPtr(NULL),
60 | m_bytes (0)
61 | {
62 | // empty
63 | }
64 |
65 | HostBuffer::~HostBuffer(void)
66 | {
67 | if (m_hostPtr)
68 | cudaFreeHost(m_hostPtr); // Don't throw an exception.
69 | }
70 |
71 | void HostBuffer::reset(size_t bytes)
72 | {
73 | if (bytes == m_bytes)
74 | return;
75 |
76 | if (m_hostPtr)
77 | {
78 | NVDR_CHECK_CUDA_ERROR(cudaFreeHost(m_hostPtr));
79 | m_hostPtr = NULL;
80 | }
81 |
82 | if (bytes > 0)
83 | NVDR_CHECK_CUDA_ERROR(cudaMallocHost(&m_hostPtr, bytes));
84 |
85 | m_bytes = bytes;
86 | }
87 |
88 | void HostBuffer::grow(size_t bytes)
89 | {
90 | if (bytes > m_bytes)
91 | reset(bytes);
92 | }
93 |
94 | //------------------------------------------------------------------------
95 |
--------------------------------------------------------------------------------
/nvdiffrast/common/cudaraster/impl/Buffer.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 | #include "Defs.hpp"
11 |
12 | namespace CR
13 | {
14 | //------------------------------------------------------------------------
15 |
16 | class Buffer
17 | {
18 | public:
19 | Buffer (void);
20 | ~Buffer (void);
21 |
22 | void reset (size_t bytes);
23 | void grow (size_t bytes);
24 | void* getPtr (size_t offset = 0) { return (void*)(((uintptr_t)m_gpuPtr) + offset); }
25 | size_t getSize (void) const { return m_bytes; }
26 |
27 | void setPtr (void* ptr) { m_gpuPtr = ptr; }
28 |
29 | private:
30 | void* m_gpuPtr;
31 | size_t m_bytes;
32 | };
33 |
34 | //------------------------------------------------------------------------
35 |
36 | class HostBuffer
37 | {
38 | public:
39 | HostBuffer (void);
40 | ~HostBuffer (void);
41 |
42 | void reset (size_t bytes);
43 | void grow (size_t bytes);
44 | void* getPtr (void) { return m_hostPtr; }
45 | size_t getSize (void) const { return m_bytes; }
46 |
47 | void setPtr (void* ptr) { m_hostPtr = ptr; }
48 |
49 | private:
50 | void* m_hostPtr;
51 | size_t m_bytes;
52 | };
53 |
54 | //------------------------------------------------------------------------
55 | }
56 |
--------------------------------------------------------------------------------
/nvdiffrast/common/cudaraster/impl/Constants.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 |
11 | //------------------------------------------------------------------------
12 |
13 | #define CR_MAXVIEWPORT_LOG2 11 // ViewportSize / PixelSize.
14 | #define CR_SUBPIXEL_LOG2 4 // PixelSize / SubpixelSize.
15 |
16 | #define CR_MAXBINS_LOG2 4 // ViewportSize / BinSize.
17 | #define CR_BIN_LOG2 4 // BinSize / TileSize.
18 | #define CR_TILE_LOG2 3 // TileSize / PixelSize.
19 |
20 | #define CR_COVER8X8_LUT_SIZE 768 // 64-bit entries.
21 | #define CR_FLIPBIT_FLIP_Y 2
22 | #define CR_FLIPBIT_FLIP_X 3
23 | #define CR_FLIPBIT_SWAP_XY 4
24 | #define CR_FLIPBIT_COMPL 5
25 |
26 | #define CR_BIN_STREAMS_LOG2 4
27 | #define CR_BIN_SEG_LOG2 9 // 32-bit entries.
28 | #define CR_TILE_SEG_LOG2 5 // 32-bit entries.
29 |
30 | #define CR_MAXSUBTRIS_LOG2 24 // Triangle structs. Dictated by CoarseRaster.
31 | #define CR_COARSE_QUEUE_LOG2 10 // Triangles.
32 |
33 | #define CR_SETUP_WARPS 2
34 | #define CR_SETUP_OPT_BLOCKS 8
35 | #define CR_BIN_WARPS 16
36 | #define CR_COARSE_WARPS 16 // Must be a power of two.
37 | #define CR_FINE_MAX_WARPS 20
38 |
39 | #define CR_EMBED_IMAGE_PARAMS 32 // Number of per-image parameter structs embedded in kernel launch parameter block.
40 |
41 | //------------------------------------------------------------------------
42 |
43 | #define CR_MAXVIEWPORT_SIZE (1 << CR_MAXVIEWPORT_LOG2)
44 | #define CR_SUBPIXEL_SIZE (1 << CR_SUBPIXEL_LOG2)
45 | #define CR_SUBPIXEL_SQR (1 << (CR_SUBPIXEL_LOG2 * 2))
46 |
47 | #define CR_MAXBINS_SIZE (1 << CR_MAXBINS_LOG2)
48 | #define CR_MAXBINS_SQR (1 << (CR_MAXBINS_LOG2 * 2))
49 | #define CR_BIN_SIZE (1 << CR_BIN_LOG2)
50 | #define CR_BIN_SQR (1 << (CR_BIN_LOG2 * 2))
51 |
52 | #define CR_MAXTILES_LOG2 (CR_MAXBINS_LOG2 + CR_BIN_LOG2)
53 | #define CR_MAXTILES_SIZE (1 << CR_MAXTILES_LOG2)
54 | #define CR_MAXTILES_SQR (1 << (CR_MAXTILES_LOG2 * 2))
55 | #define CR_TILE_SIZE (1 << CR_TILE_LOG2)
56 | #define CR_TILE_SQR (1 << (CR_TILE_LOG2 * 2))
57 |
58 | #define CR_BIN_STREAMS_SIZE (1 << CR_BIN_STREAMS_LOG2)
59 | #define CR_BIN_SEG_SIZE (1 << CR_BIN_SEG_LOG2)
60 | #define CR_TILE_SEG_SIZE (1 << CR_TILE_SEG_LOG2)
61 |
62 | #define CR_MAXSUBTRIS_SIZE (1 << CR_MAXSUBTRIS_LOG2)
63 | #define CR_COARSE_QUEUE_SIZE (1 << CR_COARSE_QUEUE_LOG2)
64 |
65 | //------------------------------------------------------------------------
66 | // When evaluating interpolated Z pixel centers, we may introduce an error
67 | // of (+-CR_LERP_ERROR) ULPs.
68 |
69 | #define CR_LERP_ERROR(SAMPLES_LOG2) (2200u << (SAMPLES_LOG2))
70 | #define CR_DEPTH_MIN CR_LERP_ERROR(3)
71 | #define CR_DEPTH_MAX (CR_U32_MAX - CR_LERP_ERROR(3))
72 |
73 | //------------------------------------------------------------------------
74 |
--------------------------------------------------------------------------------
/nvdiffrast/common/cudaraster/impl/CudaRaster.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "Defs.hpp"
10 | #include "../CudaRaster.hpp"
11 | #include "RasterImpl.hpp"
12 |
13 | using namespace CR;
14 |
15 | //------------------------------------------------------------------------
16 | // Stub interface implementation.
17 | //------------------------------------------------------------------------
18 |
19 | CudaRaster::CudaRaster()
20 | {
21 | m_impl = new RasterImpl();
22 | }
23 |
24 | CudaRaster::~CudaRaster()
25 | {
26 | delete m_impl;
27 | }
28 |
29 | void CudaRaster::setBufferSize(int width, int height, int numImages)
30 | {
31 | m_impl->setBufferSize(Vec3i(width, height, numImages));
32 | }
33 |
34 | void CudaRaster::setViewport(int width, int height, int offsetX, int offsetY)
35 | {
36 | m_impl->setViewport(Vec2i(width, height), Vec2i(offsetX, offsetY));
37 | }
38 |
39 | void CudaRaster::setRenderModeFlags(U32 flags)
40 | {
41 | m_impl->setRenderModeFlags(flags);
42 | }
43 |
44 | void CudaRaster::deferredClear(U32 clearColor)
45 | {
46 | m_impl->deferredClear(clearColor);
47 | }
48 |
49 | void CudaRaster::setVertexBuffer(void* vertices, int numVertices)
50 | {
51 | m_impl->setVertexBuffer(vertices, numVertices);
52 | }
53 |
54 | void CudaRaster::setIndexBuffer(void* indices, int numTriangles)
55 | {
56 | m_impl->setIndexBuffer(indices, numTriangles);
57 | }
58 |
59 | bool CudaRaster::drawTriangles(const int* ranges, bool peel, cudaStream_t stream)
60 | {
61 | return m_impl->drawTriangles((const Vec2i*)ranges, peel, stream);
62 | }
63 |
64 | void* CudaRaster::getColorBuffer(void)
65 | {
66 | return m_impl->getColorBuffer();
67 | }
68 |
69 | void* CudaRaster::getDepthBuffer(void)
70 | {
71 | return m_impl->getDepthBuffer();
72 | }
73 |
74 | void CudaRaster::swapDepthAndPeel(void)
75 | {
76 | m_impl->swapDepthAndPeel();
77 | }
78 |
79 | //------------------------------------------------------------------------
80 |
--------------------------------------------------------------------------------
/nvdiffrast/common/cudaraster/impl/Defs.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 | #include
11 | #include
12 |
13 | namespace CR
14 | {
15 | //------------------------------------------------------------------------
16 |
17 | #ifndef NULL
18 | # define NULL 0
19 | #endif
20 |
21 | #ifdef __CUDACC__
22 | # define CR_CUDA 1
23 | #else
24 | # define CR_CUDA 0
25 | #endif
26 |
27 | #if CR_CUDA
28 | # define CR_CUDA_FUNC __device__ __inline__
29 | # define CR_CUDA_CONST __constant__
30 | #else
31 | # define CR_CUDA_FUNC inline
32 | # define CR_CUDA_CONST static const
33 | #endif
34 |
35 | #define CR_UNREF(X) ((void)(X))
36 | #define CR_ARRAY_SIZE(X) ((int)(sizeof(X) / sizeof((X)[0])))
37 |
38 | //------------------------------------------------------------------------
39 |
40 | typedef uint8_t U8;
41 | typedef uint16_t U16;
42 | typedef uint32_t U32;
43 | typedef uint64_t U64;
44 | typedef int8_t S8;
45 | typedef int16_t S16;
46 | typedef int32_t S32;
47 | typedef int64_t S64;
48 | typedef float F32;
49 | typedef double F64;
50 | typedef void (*FuncPtr)(void);
51 |
52 | //------------------------------------------------------------------------
53 |
54 | #define CR_U32_MAX (0xFFFFFFFFu)
55 | #define CR_S32_MIN (~0x7FFFFFFF)
56 | #define CR_S32_MAX (0x7FFFFFFF)
57 | #define CR_U64_MAX ((U64)(S64)-1)
58 | #define CR_S64_MIN ((S64)-1 << 63)
59 | #define CR_S64_MAX (~((S64)-1 << 63))
60 | #define CR_F32_MIN (1.175494351e-38f)
61 | #define CR_F32_MAX (3.402823466e+38f)
62 | #define CR_F64_MIN (2.2250738585072014e-308)
63 | #define CR_F64_MAX (1.7976931348623158e+308)
64 |
65 | //------------------------------------------------------------------------
66 | // Misc types.
67 |
68 | class Vec2i
69 | {
70 | public:
71 | Vec2i(int x_, int y_) : x(x_), y(y_) {}
72 | int x, y;
73 | };
74 |
75 | class Vec3i
76 | {
77 | public:
78 | Vec3i(int x_, int y_, int z_) : x(x_), y(y_), z(z_) {}
79 | int x, y, z;
80 | };
81 |
82 | //------------------------------------------------------------------------
83 | // CUDA utilities.
84 |
85 | #if CR_CUDA
86 | # define globalThreadIdx (threadIdx.x + blockDim.x * (threadIdx.y + blockDim.y * (blockIdx.x + gridDim.x * blockIdx.y)))
87 | #endif
88 |
89 | //------------------------------------------------------------------------
90 | } // namespace CR
91 |
--------------------------------------------------------------------------------
/nvdiffrast/common/cudaraster/impl/PrivateDefs.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 | #include "Defs.hpp"
11 | #include "Constants.hpp"
12 |
13 | namespace CR
14 | {
15 | //------------------------------------------------------------------------
16 | // Projected triangle.
17 | //------------------------------------------------------------------------
18 |
19 | struct CRTriangleHeader
20 | {
21 | S16 v0x; // Subpixels relative to viewport center. Valid if triSubtris = 1.
22 | S16 v0y;
23 | S16 v1x;
24 | S16 v1y;
25 | S16 v2x;
26 | S16 v2y;
27 |
28 | U32 misc; // triSubtris=1: (zmin:20, f01:4, f12:4, f20:4), triSubtris>=2: (subtriBase)
29 | };
30 |
31 | //------------------------------------------------------------------------
32 |
33 | struct CRTriangleData
34 | {
35 | U32 zx; // zx * sampleX + zy * sampleY + zb = lerp(CR_DEPTH_MIN, CR_DEPTH_MAX, (clipZ / clipW + 1) / 2)
36 | U32 zy;
37 | U32 zb;
38 | U32 id; // Triangle id.
39 | };
40 |
41 | //------------------------------------------------------------------------
42 | // Device-side structures.
43 | //------------------------------------------------------------------------
44 |
45 | struct CRAtomics
46 | {
47 | // Setup.
48 | S32 numSubtris; // = numTris
49 |
50 | // Bin.
51 | S32 binCounter; // = 0
52 | S32 numBinSegs; // = 0
53 |
54 | // Coarse.
55 | S32 coarseCounter; // = 0
56 | S32 numTileSegs; // = 0
57 | S32 numActiveTiles; // = 0
58 |
59 | // Fine.
60 | S32 fineCounter; // = 0
61 | };
62 |
63 | //------------------------------------------------------------------------
64 |
65 | struct CRImageParams
66 | {
67 | S32 triOffset; // First triangle index to draw.
68 | S32 triCount; // Number of triangles to draw.
69 | S32 binBatchSize; // Number of triangles per batch.
70 | };
71 |
72 | //------------------------------------------------------------------------
73 |
74 | struct CRParams
75 | {
76 | // Common.
77 |
78 | CRAtomics* atomics; // Work counters. Per-image.
79 | S32 numImages; // Batch size.
80 | S32 totalCount; // In range mode, total number of triangles to render.
81 | S32 instanceMode; // 0 = range mode, 1 = instance mode.
82 |
83 | S32 numVertices; // Number of vertices in input buffer, not counting multiples in instance mode.
84 | S32 numTriangles; // Number of triangles in input buffer.
85 | void* vertexBuffer; // numVertices * float4(x, y, z, w)
86 | void* indexBuffer; // numTriangles * int3(vi0, vi1, vi2)
87 |
88 | S32 widthPixels; // Render buffer size in pixels. Must be multiple of tile size (8x8).
89 | S32 heightPixels;
90 | S32 widthPixelsVp; // Viewport size in pixels.
91 | S32 heightPixelsVp;
92 | S32 widthBins; // widthPixels / CR_BIN_SIZE
93 | S32 heightBins; // heightPixels / CR_BIN_SIZE
94 | S32 numBins; // widthBins * heightBins
95 |
96 | F32 xs; // Vertex position adjustments for tiled rendering.
97 | F32 ys;
98 | F32 xo;
99 | F32 yo;
100 |
101 | S32 widthTiles; // widthPixels / CR_TILE_SIZE
102 | S32 heightTiles; // heightPixels / CR_TILE_SIZE
103 | S32 numTiles; // widthTiles * heightTiles
104 |
105 | U32 renderModeFlags;
106 | S32 deferredClear; // 1 = Clear framebuffer before rendering triangles.
107 | U32 clearColor;
108 | U32 clearDepth;
109 |
110 | // These are uniform across batch.
111 |
112 | S32 maxSubtris;
113 | S32 maxBinSegs;
114 | S32 maxTileSegs;
115 |
116 | // Setup output / bin input.
117 |
118 | void* triSubtris; // maxSubtris * U8
119 | void* triHeader; // maxSubtris * CRTriangleHeader
120 | void* triData; // maxSubtris * CRTriangleData
121 |
122 | // Bin output / coarse input.
123 |
124 | void* binSegData; // maxBinSegs * CR_BIN_SEG_SIZE * S32
125 | void* binSegNext; // maxBinSegs * S32
126 | void* binSegCount; // maxBinSegs * S32
127 | void* binFirstSeg; // CR_MAXBINS_SQR * CR_BIN_STREAMS_SIZE * (S32 segIdx), -1 = none
128 | void* binTotal; // CR_MAXBINS_SQR * CR_BIN_STREAMS_SIZE * (S32 numTris)
129 |
130 | // Coarse output / fine input.
131 |
132 | void* tileSegData; // maxTileSegs * CR_TILE_SEG_SIZE * S32
133 | void* tileSegNext; // maxTileSegs * S32
134 | void* tileSegCount; // maxTileSegs * S32
135 | void* activeTiles; // CR_MAXTILES_SQR * (S32 tileIdx)
136 | void* tileFirstSeg; // CR_MAXTILES_SQR * (S32 segIdx), -1 = none
137 |
138 | // Surface buffers. Outer tile offset is baked into pointers.
139 |
140 | void* colorBuffer; // sizePixels.x * sizePixels.y * numImages * U32
141 | void* depthBuffer; // sizePixels.x * sizePixels.y * numImages * U32
142 | void* peelBuffer; // sizePixels.x * sizePixels.y * numImages * U32, only if peeling enabled.
143 | S32 strideX; // horizontal size in pixels
144 | S32 strideY; // vertical stride in pixels
145 |
146 | // Per-image parameters for first images are embedded here to avoid extra memcpy for small batches.
147 |
148 | CRImageParams imageParamsFirst[CR_EMBED_IMAGE_PARAMS];
149 | const CRImageParams* imageParamsExtra; // After CR_EMBED_IMAGE_PARAMS.
150 | };
151 |
152 | //------------------------------------------------------------------------
153 | }
154 |
--------------------------------------------------------------------------------
/nvdiffrast/common/cudaraster/impl/RasterImpl.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "../CudaRaster.hpp"
10 | #include "PrivateDefs.hpp"
11 | #include "Constants.hpp"
12 | #include "Util.inl"
13 |
14 | namespace CR
15 | {
16 |
17 | //------------------------------------------------------------------------
18 | // Stage implementations.
19 | //------------------------------------------------------------------------
20 |
21 | #include "TriangleSetup.inl"
22 | #include "BinRaster.inl"
23 | #include "CoarseRaster.inl"
24 | #include "FineRaster.inl"
25 |
26 | }
27 |
28 | //------------------------------------------------------------------------
29 | // Stage entry points.
30 | //------------------------------------------------------------------------
31 |
32 | __global__ void __launch_bounds__(CR_SETUP_WARPS * 32, CR_SETUP_OPT_BLOCKS) triangleSetupKernel (const CR::CRParams p) { CR::triangleSetupImpl(p); }
33 | __global__ void __launch_bounds__(CR_BIN_WARPS * 32, 1) binRasterKernel (const CR::CRParams p) { CR::binRasterImpl(p); }
34 | __global__ void __launch_bounds__(CR_COARSE_WARPS * 32, 1) coarseRasterKernel (const CR::CRParams p) { CR::coarseRasterImpl(p); }
35 | __global__ void __launch_bounds__(CR_FINE_MAX_WARPS * 32, 1) fineRasterKernel (const CR::CRParams p) { CR::fineRasterImpl(p); }
36 |
37 | //------------------------------------------------------------------------
38 |
--------------------------------------------------------------------------------
/nvdiffrast/common/cudaraster/impl/RasterImpl.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 | #include "PrivateDefs.hpp"
11 | #include "Buffer.hpp"
12 | #include "../CudaRaster.hpp"
13 |
14 | namespace CR
15 | {
16 | //------------------------------------------------------------------------
17 |
18 | class RasterImpl
19 | {
20 | public:
21 | RasterImpl (void);
22 | ~RasterImpl (void);
23 |
24 | void setBufferSize (Vec3i size);
25 | void setViewport (Vec2i size, Vec2i offset);
26 | void setRenderModeFlags (U32 flags) { m_renderModeFlags = flags; }
27 | void deferredClear (U32 color) { m_deferredClear = true; m_clearColor = color; }
28 | void setVertexBuffer (void* ptr, int numVertices) { m_vertexPtr = ptr; m_numVertices = numVertices; } // GPU pointer.
29 | void setIndexBuffer (void* ptr, int numTriangles) { m_indexPtr = ptr; m_numTriangles = numTriangles; } // GPU pointer.
30 | bool drawTriangles (const Vec2i* ranges, bool peel, cudaStream_t stream);
31 | void* getColorBuffer (void) { return m_colorBuffer.getPtr(); } // GPU pointer.
32 | void* getDepthBuffer (void) { return m_depthBuffer.getPtr(); } // GPU pointer.
33 | void swapDepthAndPeel (void);
34 | size_t getTotalBufferSizes (void) const;
35 |
36 | private:
37 | void launchStages (bool instanceMode, bool peel, cudaStream_t stream);
38 |
39 | // State.
40 |
41 | unsigned int m_renderModeFlags;
42 | bool m_deferredClear;
43 | unsigned int m_clearColor;
44 | void* m_vertexPtr;
45 | void* m_indexPtr;
46 | int m_numVertices; // Input buffer size.
47 | int m_numTriangles; // Input buffer size.
48 | size_t m_bufferSizesReported; // Previously reported buffer sizes.
49 |
50 | // Surfaces.
51 |
52 | Buffer m_colorBuffer;
53 | Buffer m_depthBuffer;
54 | Buffer m_peelBuffer;
55 | int m_numImages;
56 | Vec2i m_bufferSizePixels; // Internal buffer size.
57 | Vec2i m_bufferSizeVp; // Total viewport size.
58 | Vec2i m_sizePixels; // Internal size at which all computation is done, buffers reserved, etc.
59 | Vec2i m_sizeVp; // Size to which output will be cropped outside, determines viewport size.
60 | Vec2i m_offsetPixels; // Viewport offset for tiled rendering.
61 | Vec2i m_sizeBins;
62 | S32 m_numBins;
63 | Vec2i m_sizeTiles;
64 | S32 m_numTiles;
65 |
66 | // Launch sizes etc.
67 |
68 | S32 m_numSMs;
69 | S32 m_numCoarseBlocksPerSM;
70 | S32 m_numFineBlocksPerSM;
71 | S32 m_numFineWarpsPerBlock;
72 |
73 | // Global intermediate buffers. Individual images have offsets to these.
74 |
75 | Buffer m_crAtomics;
76 | HostBuffer m_crAtomicsHost;
77 | HostBuffer m_crImageParamsHost;
78 | Buffer m_crImageParamsExtra;
79 | Buffer m_triSubtris;
80 | Buffer m_triHeader;
81 | Buffer m_triData;
82 | Buffer m_binFirstSeg;
83 | Buffer m_binTotal;
84 | Buffer m_binSegData;
85 | Buffer m_binSegNext;
86 | Buffer m_binSegCount;
87 | Buffer m_activeTiles;
88 | Buffer m_tileFirstSeg;
89 | Buffer m_tileSegData;
90 | Buffer m_tileSegNext;
91 | Buffer m_tileSegCount;
92 |
93 | // Actual buffer sizes.
94 |
95 | S32 m_maxSubtris;
96 | S32 m_maxBinSegs;
97 | S32 m_maxTileSegs;
98 | };
99 |
100 | //------------------------------------------------------------------------
101 | } // namespace CR
102 |
103 |
--------------------------------------------------------------------------------
/nvdiffrast/common/framework.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 |
11 | // Framework-specific macros to enable code sharing.
12 |
13 | //------------------------------------------------------------------------
14 | // Tensorflow.
15 |
16 | #ifdef NVDR_TENSORFLOW
17 | #define EIGEN_USE_GPU
18 | #include "tensorflow/core/framework/op.h"
19 | #include "tensorflow/core/framework/op_kernel.h"
20 | #include "tensorflow/core/framework/shape_inference.h"
21 | #include "tensorflow/core/platform/default/logging.h"
22 | using namespace tensorflow;
23 | using namespace tensorflow::shape_inference;
24 | #define NVDR_CTX_ARGS OpKernelContext* _nvdr_ctx
25 | #define NVDR_CTX_PARAMS _nvdr_ctx
26 | #define NVDR_CHECK(COND, ERR) OP_REQUIRES(_nvdr_ctx, COND, errors::Internal(ERR))
27 | #define NVDR_CHECK_CUDA_ERROR(CUDA_CALL) OP_CHECK_CUDA_ERROR(_nvdr_ctx, CUDA_CALL)
28 | #define NVDR_CHECK_GL_ERROR(GL_CALL) OP_CHECK_GL_ERROR(_nvdr_ctx, GL_CALL)
29 | #endif
30 |
31 | //------------------------------------------------------------------------
32 | // PyTorch.
33 |
34 | #ifdef NVDR_TORCH
35 | #ifndef __CUDACC__
36 | #include
37 | #include
38 | #include
39 | #include
40 | #include
41 | #endif
42 | #define NVDR_CTX_ARGS int _nvdr_ctx_dummy
43 | #define NVDR_CTX_PARAMS 0
44 | #define NVDR_CHECK(COND, ERR) do { TORCH_CHECK(COND, ERR) } while(0)
45 | #define NVDR_CHECK_CUDA_ERROR(CUDA_CALL) do { cudaError_t err = CUDA_CALL; TORCH_CHECK(!err, "Cuda error: ", cudaGetLastError(), "[", #CUDA_CALL, ";]"); } while(0)
46 | #define NVDR_CHECK_GL_ERROR(GL_CALL) do { GL_CALL; GLenum err = glGetError(); TORCH_CHECK(err == GL_NO_ERROR, "OpenGL error: ", getGLErrorString(err), "[", #GL_CALL, ";]"); } while(0)
47 | #endif
48 |
49 | //------------------------------------------------------------------------
50 |
--------------------------------------------------------------------------------
/nvdiffrast/common/glutil.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 |
11 | //------------------------------------------------------------------------
12 | // Windows-specific headers and types.
13 | //------------------------------------------------------------------------
14 |
15 | #ifdef _WIN32
16 | #define NOMINMAX
17 | #include // Required by gl.h in Windows.
18 | #define GLAPIENTRY APIENTRY
19 |
20 | struct GLContext
21 | {
22 | HDC hdc;
23 | HGLRC hglrc;
24 | int extInitialized;
25 | };
26 |
27 | #endif // _WIN32
28 |
29 | //------------------------------------------------------------------------
30 | // Linux-specific headers and types.
31 | //------------------------------------------------------------------------
32 |
33 | #ifdef __linux__
34 | #define EGL_NO_X11 // X11/Xlib.h has "#define Status int" which breaks Tensorflow. Avoid it.
35 | #define MESA_EGL_NO_X11_HEADERS
36 | #include
37 | #include
38 | #define GLAPIENTRY
39 |
40 | struct GLContext
41 | {
42 | EGLDisplay display;
43 | EGLContext context;
44 | int extInitialized;
45 | };
46 |
47 | #endif // __linux__
48 |
49 | //------------------------------------------------------------------------
50 | // OpenGL, CUDA interop, GL extensions.
51 | //------------------------------------------------------------------------
52 | #define GL_GLEXT_LEGACY
53 | #include
54 | #include
55 |
56 | // Constants.
57 | #ifndef GL_VERSION_1_2
58 | #define GL_CLAMP_TO_EDGE 0x812F
59 | #define GL_TEXTURE_3D 0x806F
60 | #endif
61 | #ifndef GL_VERSION_1_5
62 | #define GL_ARRAY_BUFFER 0x8892
63 | #define GL_DYNAMIC_DRAW 0x88E8
64 | #define GL_ELEMENT_ARRAY_BUFFER 0x8893
65 | #endif
66 | #ifndef GL_VERSION_2_0
67 | #define GL_FRAGMENT_SHADER 0x8B30
68 | #define GL_INFO_LOG_LENGTH 0x8B84
69 | #define GL_LINK_STATUS 0x8B82
70 | #define GL_VERTEX_SHADER 0x8B31
71 | #endif
72 | #ifndef GL_VERSION_3_0
73 | #define GL_MAJOR_VERSION 0x821B
74 | #define GL_MINOR_VERSION 0x821C
75 | #define GL_RGBA32F 0x8814
76 | #define GL_TEXTURE_2D_ARRAY 0x8C1A
77 | #endif
78 | #ifndef GL_VERSION_3_2
79 | #define GL_GEOMETRY_SHADER 0x8DD9
80 | #endif
81 | #ifndef GL_ARB_framebuffer_object
82 | #define GL_COLOR_ATTACHMENT0 0x8CE0
83 | #define GL_COLOR_ATTACHMENT1 0x8CE1
84 | #define GL_DEPTH_STENCIL 0x84F9
85 | #define GL_DEPTH_STENCIL_ATTACHMENT 0x821A
86 | #define GL_DEPTH24_STENCIL8 0x88F0
87 | #define GL_FRAMEBUFFER 0x8D40
88 | #define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506
89 | #define GL_UNSIGNED_INT_24_8 0x84FA
90 | #endif
91 | #ifndef GL_ARB_imaging
92 | #define GL_TABLE_TOO_LARGE 0x8031
93 | #endif
94 | #ifndef GL_KHR_robustness
95 | #define GL_CONTEXT_LOST 0x0507
96 | #endif
97 |
98 | // Declare function pointers to OpenGL extension functions.
99 | #define GLUTIL_EXT(return_type, name, ...) extern return_type (GLAPIENTRY* name)(__VA_ARGS__);
100 | #include "glutil_extlist.h"
101 | #undef GLUTIL_EXT
102 |
103 | //------------------------------------------------------------------------
104 | // Common functions.
105 | //------------------------------------------------------------------------
106 |
107 | void setGLContext (GLContext& glctx);
108 | void releaseGLContext (void);
109 | GLContext createGLContext (int cudaDeviceIdx);
110 | void destroyGLContext (GLContext& glctx);
111 | const char* getGLErrorString (GLenum err);
112 |
113 | //------------------------------------------------------------------------
114 |
--------------------------------------------------------------------------------
/nvdiffrast/common/glutil_extlist.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #ifndef GL_VERSION_1_2
10 | GLUTIL_EXT(void, glTexImage3D, GLenum target, GLint level, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void *pixels);
11 | #endif
12 | #ifndef GL_VERSION_1_5
13 | GLUTIL_EXT(void, glBindBuffer, GLenum target, GLuint buffer);
14 | GLUTIL_EXT(void, glBufferData, GLenum target, ptrdiff_t size, const void* data, GLenum usage);
15 | GLUTIL_EXT(void, glGenBuffers, GLsizei n, GLuint* buffers);
16 | #endif
17 | #ifndef GL_VERSION_2_0
18 | GLUTIL_EXT(void, glAttachShader, GLuint program, GLuint shader);
19 | GLUTIL_EXT(void, glCompileShader, GLuint shader);
20 | GLUTIL_EXT(GLuint, glCreateProgram, void);
21 | GLUTIL_EXT(GLuint, glCreateShader, GLenum type);
22 | GLUTIL_EXT(void, glDrawBuffers, GLsizei n, const GLenum* bufs);
23 | GLUTIL_EXT(void, glEnableVertexAttribArray, GLuint index);
24 | GLUTIL_EXT(void, glGetProgramInfoLog, GLuint program, GLsizei bufSize, GLsizei* length, char* infoLog);
25 | GLUTIL_EXT(void, glGetProgramiv, GLuint program, GLenum pname, GLint* param);
26 | GLUTIL_EXT(void, glLinkProgram, GLuint program);
27 | GLUTIL_EXT(void, glShaderSource, GLuint shader, GLsizei count, const char *const* string, const GLint* length);
28 | GLUTIL_EXT(void, glUniform1f, GLint location, GLfloat v0);
29 | GLUTIL_EXT(void, glUniform2f, GLint location, GLfloat v0, GLfloat v1);
30 | GLUTIL_EXT(void, glUseProgram, GLuint program);
31 | GLUTIL_EXT(void, glVertexAttribPointer, GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void* pointer);
32 | #endif
33 | #ifndef GL_VERSION_3_2
34 | GLUTIL_EXT(void, glFramebufferTexture, GLenum target, GLenum attachment, GLuint texture, GLint level);
35 | #endif
36 | #ifndef GL_ARB_framebuffer_object
37 | GLUTIL_EXT(void, glBindFramebuffer, GLenum target, GLuint framebuffer);
38 | GLUTIL_EXT(void, glGenFramebuffers, GLsizei n, GLuint* framebuffers);
39 | #endif
40 | #ifndef GL_ARB_vertex_array_object
41 | GLUTIL_EXT(void, glBindVertexArray, GLuint array);
42 | GLUTIL_EXT(void, glGenVertexArrays, GLsizei n, GLuint* arrays);
43 | #endif
44 | #ifndef GL_ARB_multi_draw_indirect
45 | GLUTIL_EXT(void, glMultiDrawElementsIndirect, GLenum mode, GLenum type, const void *indirect, GLsizei primcount, GLsizei stride);
46 | #endif
47 |
48 | //------------------------------------------------------------------------
49 |
--------------------------------------------------------------------------------
/nvdiffrast/common/interpolate.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "common.h"
10 | #include "interpolate.h"
11 |
12 | //------------------------------------------------------------------------
13 | // Forward kernel.
14 |
15 | template
16 | static __forceinline__ __device__ void InterpolateFwdKernelTemplate(const InterpolateKernelParams p)
17 | {
18 | // Calculate pixel position.
19 | int px = blockIdx.x * blockDim.x + threadIdx.x;
20 | int py = blockIdx.y * blockDim.y + threadIdx.y;
21 | int pz = blockIdx.z;
22 | if (px >= p.width || py >= p.height || pz >= p.depth)
23 | return;
24 |
25 | // Pixel index.
26 | int pidx = px + p.width * (py + p.height * pz);
27 |
28 | // Output ptrs.
29 | float* out = p.out + pidx * p.numAttr;
30 | float2* outDA = ENABLE_DA ? (((float2*)p.outDA) + pidx * p.numDiffAttr) : 0;
31 |
32 | // Fetch rasterizer output.
33 | float4 r = ((float4*)p.rast)[pidx];
34 | int triIdx = float_to_triidx(r.w) - 1;
35 | bool triValid = (triIdx >= 0 && triIdx < p.numTriangles);
36 |
37 | // If no geometry in entire warp, zero the output and exit.
38 | // Otherwise force barys to zero and output with live threads.
39 | if (__all_sync(0xffffffffu, !triValid))
40 | {
41 | for (int i=0; i < p.numAttr; i++)
42 | out[i] = 0.f;
43 | if (ENABLE_DA)
44 | for (int i=0; i < p.numDiffAttr; i++)
45 | outDA[i] = make_float2(0.f, 0.f);
46 | return;
47 | }
48 |
49 | // Fetch vertex indices.
50 | int vi0 = triValid ? p.tri[triIdx * 3 + 0] : 0;
51 | int vi1 = triValid ? p.tri[triIdx * 3 + 1] : 0;
52 | int vi2 = triValid ? p.tri[triIdx * 3 + 2] : 0;
53 |
54 | // Bail out if corrupt indices.
55 | if (vi0 < 0 || vi0 >= p.numVertices ||
56 | vi1 < 0 || vi1 >= p.numVertices ||
57 | vi2 < 0 || vi2 >= p.numVertices)
58 | return;
59 |
60 | // In instance mode, adjust vertex indices by minibatch index unless broadcasting.
61 | if (p.instance_mode && !p.attrBC)
62 | {
63 | vi0 += pz * p.numVertices;
64 | vi1 += pz * p.numVertices;
65 | vi2 += pz * p.numVertices;
66 | }
67 |
68 | // Pointers to attributes.
69 | const float* a0 = p.attr + vi0 * p.numAttr;
70 | const float* a1 = p.attr + vi1 * p.numAttr;
71 | const float* a2 = p.attr + vi2 * p.numAttr;
72 |
73 | // Barys. If no triangle, force all to zero -> output is zero.
74 | float b0 = triValid ? r.x : 0.f;
75 | float b1 = triValid ? r.y : 0.f;
76 | float b2 = triValid ? (1.f - r.x - r.y) : 0.f;
77 |
78 | // Interpolate and write attributes.
79 | for (int i=0; i < p.numAttr; i++)
80 | out[i] = b0*a0[i] + b1*a1[i] + b2*a2[i];
81 |
82 | // No diff attrs? Exit.
83 | if (!ENABLE_DA)
84 | return;
85 |
86 | // Read bary pixel differentials if we have a triangle.
87 | float4 db = make_float4(0.f, 0.f, 0.f, 0.f);
88 | if (triValid)
89 | db = ((float4*)p.rastDB)[pidx];
90 |
91 | // Unpack a bit.
92 | float dudx = db.x;
93 | float dudy = db.y;
94 | float dvdx = db.z;
95 | float dvdy = db.w;
96 |
97 | // Calculate the pixel differentials of chosen attributes.
98 | for (int i=0; i < p.numDiffAttr; i++)
99 | {
100 | // Input attribute index.
101 | int j = p.diff_attrs_all ? i : p.diffAttrs[i];
102 | if (j < 0)
103 | j += p.numAttr; // Python-style negative indices.
104 |
105 | // Zero output if invalid index.
106 | float dsdx = 0.f;
107 | float dsdy = 0.f;
108 | if (j >= 0 && j < p.numAttr)
109 | {
110 | float s0 = a0[j];
111 | float s1 = a1[j];
112 | float s2 = a2[j];
113 | float dsdu = s0 - s2;
114 | float dsdv = s1 - s2;
115 | dsdx = dudx*dsdu + dvdx*dsdv;
116 | dsdy = dudy*dsdu + dvdy*dsdv;
117 | }
118 |
119 | // Write.
120 | outDA[i] = make_float2(dsdx, dsdy);
121 | }
122 | }
123 |
124 | // Template specializations.
125 | __global__ void InterpolateFwdKernel (const InterpolateKernelParams p) { InterpolateFwdKernelTemplate(p); }
126 | __global__ void InterpolateFwdKernelDa(const InterpolateKernelParams p) { InterpolateFwdKernelTemplate(p); }
127 |
128 | //------------------------------------------------------------------------
129 | // Gradient kernel.
130 |
131 | template
132 | static __forceinline__ __device__ void InterpolateGradKernelTemplate(const InterpolateKernelParams p)
133 | {
134 | // Temporary space for coalesced atomics.
135 | CA_DECLARE_TEMP(IP_GRAD_MAX_KERNEL_BLOCK_WIDTH * IP_GRAD_MAX_KERNEL_BLOCK_HEIGHT);
136 |
137 | // Calculate pixel position.
138 | int px = blockIdx.x * blockDim.x + threadIdx.x;
139 | int py = blockIdx.y * blockDim.y + threadIdx.y;
140 | int pz = blockIdx.z;
141 | if (px >= p.width || py >= p.height || pz >= p.depth)
142 | return;
143 |
144 | // Pixel index.
145 | int pidx = px + p.width * (py + p.height * pz);
146 |
147 | // Fetch triangle ID. If none, output zero bary/db gradients and exit.
148 | float4 r = ((float4*)p.rast)[pidx];
149 | int triIdx = float_to_triidx(r.w) - 1;
150 | if (triIdx < 0 || triIdx >= p.numTriangles)
151 | {
152 | ((float4*)p.gradRaster)[pidx] = make_float4(0.f, 0.f, 0.f, 0.f);
153 | if (ENABLE_DA)
154 | ((float4*)p.gradRasterDB)[pidx] = make_float4(0.f, 0.f, 0.f, 0.f);
155 | return;
156 | }
157 |
158 | // Fetch vertex indices.
159 | int vi0 = p.tri[triIdx * 3 + 0];
160 | int vi1 = p.tri[triIdx * 3 + 1];
161 | int vi2 = p.tri[triIdx * 3 + 2];
162 |
163 | // Bail out if corrupt indices.
164 | if (vi0 < 0 || vi0 >= p.numVertices ||
165 | vi1 < 0 || vi1 >= p.numVertices ||
166 | vi2 < 0 || vi2 >= p.numVertices)
167 | return;
168 |
169 | // In instance mode, adjust vertex indices by minibatch index unless broadcasting.
170 | if (p.instance_mode && !p.attrBC)
171 | {
172 | vi0 += pz * p.numVertices;
173 | vi1 += pz * p.numVertices;
174 | vi2 += pz * p.numVertices;
175 | }
176 |
177 | // Initialize coalesced atomics.
178 | CA_SET_GROUP(triIdx);
179 |
180 | // Pointers to inputs.
181 | const float* a0 = p.attr + vi0 * p.numAttr;
182 | const float* a1 = p.attr + vi1 * p.numAttr;
183 | const float* a2 = p.attr + vi2 * p.numAttr;
184 | const float* pdy = p.dy + pidx * p.numAttr;
185 |
186 | // Pointers to outputs.
187 | float* ga0 = p.gradAttr + vi0 * p.numAttr;
188 | float* ga1 = p.gradAttr + vi1 * p.numAttr;
189 | float* ga2 = p.gradAttr + vi2 * p.numAttr;
190 |
191 | // Barys and bary gradient accumulators.
192 | float b0 = r.x;
193 | float b1 = r.y;
194 | float b2 = 1.f - r.x - r.y;
195 | float gb0 = 0.f;
196 | float gb1 = 0.f;
197 |
198 | // Loop over attributes and accumulate attribute gradients.
199 | for (int i=0; i < p.numAttr; i++)
200 | {
201 | float y = pdy[i];
202 | float s0 = a0[i];
203 | float s1 = a1[i];
204 | float s2 = a2[i];
205 | gb0 += y * (s0 - s2);
206 | gb1 += y * (s1 - s2);
207 | caAtomicAdd(ga0 + i, b0 * y);
208 | caAtomicAdd(ga1 + i, b1 * y);
209 | caAtomicAdd(ga2 + i, b2 * y);
210 | }
211 |
212 | // Write the bary gradients.
213 | ((float4*)p.gradRaster)[pidx] = make_float4(gb0, gb1, 0.f, 0.f);
214 |
215 | // If pixel differentials disabled, we're done.
216 | if (!ENABLE_DA)
217 | return;
218 |
219 | // Calculate gradients based on attribute pixel differentials.
220 | const float2* dda = ((float2*)p.dda) + pidx * p.numDiffAttr;
221 | float gdudx = 0.f;
222 | float gdudy = 0.f;
223 | float gdvdx = 0.f;
224 | float gdvdy = 0.f;
225 |
226 | // Read bary pixel differentials.
227 | float4 db = ((float4*)p.rastDB)[pidx];
228 | float dudx = db.x;
229 | float dudy = db.y;
230 | float dvdx = db.z;
231 | float dvdy = db.w;
232 |
233 | for (int i=0; i < p.numDiffAttr; i++)
234 | {
235 | // Input attribute index.
236 | int j = p.diff_attrs_all ? i : p.diffAttrs[i];
237 | if (j < 0)
238 | j += p.numAttr; // Python-style negative indices.
239 |
240 | // Check that index is valid.
241 | if (j >= 0 && j < p.numAttr)
242 | {
243 | float2 dsdxy = dda[i];
244 | float dsdx = dsdxy.x;
245 | float dsdy = dsdxy.y;
246 |
247 | float s0 = a0[j];
248 | float s1 = a1[j];
249 | float s2 = a2[j];
250 |
251 | // Gradients of db.
252 | float dsdu = s0 - s2;
253 | float dsdv = s1 - s2;
254 | gdudx += dsdu * dsdx;
255 | gdudy += dsdu * dsdy;
256 | gdvdx += dsdv * dsdx;
257 | gdvdy += dsdv * dsdy;
258 |
259 | // Gradients of attributes.
260 | float du = dsdx*dudx + dsdy*dudy;
261 | float dv = dsdx*dvdx + dsdy*dvdy;
262 | caAtomicAdd(ga0 + j, du);
263 | caAtomicAdd(ga1 + j, dv);
264 | caAtomicAdd(ga2 + j, -du - dv);
265 | }
266 | }
267 |
268 | // Write.
269 | ((float4*)p.gradRasterDB)[pidx] = make_float4(gdudx, gdudy, gdvdx, gdvdy);
270 | }
271 |
272 | // Template specializations.
273 | __global__ void InterpolateGradKernel (const InterpolateKernelParams p) { InterpolateGradKernelTemplate(p); }
274 | __global__ void InterpolateGradKernelDa(const InterpolateKernelParams p) { InterpolateGradKernelTemplate(p); }
275 |
276 | //------------------------------------------------------------------------
277 |
--------------------------------------------------------------------------------
/nvdiffrast/common/interpolate.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 |
11 | //------------------------------------------------------------------------
12 | // Constants and helpers.
13 |
14 | #define IP_FWD_MAX_KERNEL_BLOCK_WIDTH 8
15 | #define IP_FWD_MAX_KERNEL_BLOCK_HEIGHT 8
16 | #define IP_GRAD_MAX_KERNEL_BLOCK_WIDTH 8
17 | #define IP_GRAD_MAX_KERNEL_BLOCK_HEIGHT 8
18 | #define IP_MAX_DIFF_ATTRS 32
19 |
20 | //------------------------------------------------------------------------
21 | // CUDA kernel params.
22 |
23 | struct InterpolateKernelParams
24 | {
25 | const int* tri; // Incoming triangle buffer.
26 | const float* attr; // Incoming attribute buffer.
27 | const float* rast; // Incoming rasterizer output buffer.
28 | const float* rastDB; // Incoming rasterizer output buffer for bary derivatives.
29 | const float* dy; // Incoming attribute gradients.
30 | const float* dda; // Incoming attr diff gradients.
31 | float* out; // Outgoing interpolated attributes.
32 | float* outDA; // Outgoing texcoord major axis lengths.
33 | float* gradAttr; // Outgoing attribute gradients.
34 | float* gradRaster; // Outgoing rasterizer gradients.
35 | float* gradRasterDB; // Outgoing rasterizer bary diff gradients.
36 | int numTriangles; // Number of triangles.
37 | int numVertices; // Number of vertices.
38 | int numAttr; // Number of total vertex attributes.
39 | int numDiffAttr; // Number of attributes to differentiate.
40 | int width; // Image width.
41 | int height; // Image height.
42 | int depth; // Minibatch size.
43 | int attrBC; // 0=normal, 1=attr is broadcast.
44 | int instance_mode; // 0=normal, 1=instance mode.
45 | int diff_attrs_all; // 0=normal, 1=produce pixel differentials for all attributes.
46 | int diffAttrs[IP_MAX_DIFF_ATTRS]; // List of attributes to differentiate.
47 | };
48 |
49 | //------------------------------------------------------------------------
50 |
--------------------------------------------------------------------------------
/nvdiffrast/common/rasterize.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "common.h"
10 | #include "rasterize.h"
11 |
12 | //------------------------------------------------------------------------
13 | // Cuda forward rasterizer pixel shader kernel.
14 |
15 | __global__ void RasterizeCudaFwdShaderKernel(const RasterizeCudaFwdShaderParams p)
16 | {
17 | // Calculate pixel position.
18 | int px = blockIdx.x * blockDim.x + threadIdx.x;
19 | int py = blockIdx.y * blockDim.y + threadIdx.y;
20 | int pz = blockIdx.z;
21 | if (px >= p.width_out || py >= p.height_out || pz >= p.depth)
22 | return;
23 |
24 | // Pixel indices.
25 | int pidx_in = px + p.width_in * (py + p.height_in * pz);
26 | int pidx_out = px + p.width_out * (py + p.height_out * pz);
27 |
28 | // Fetch triangle idx.
29 | int triIdx = p.in_idx[pidx_in] - 1;
30 | if (triIdx < 0 || triIdx >= p.numTriangles)
31 | {
32 | // No or corrupt triangle.
33 | ((float4*)p.out)[pidx_out] = make_float4(0.0, 0.0, 0.0, 0.0); // Clear out.
34 | ((float4*)p.out_db)[pidx_out] = make_float4(0.0, 0.0, 0.0, 0.0); // Clear out_db.
35 | return;
36 | }
37 |
38 | // Fetch vertex indices.
39 | int vi0 = p.tri[triIdx * 3 + 0];
40 | int vi1 = p.tri[triIdx * 3 + 1];
41 | int vi2 = p.tri[triIdx * 3 + 2];
42 |
43 | // Bail out if vertex indices are corrupt.
44 | if (vi0 < 0 || vi0 >= p.numVertices ||
45 | vi1 < 0 || vi1 >= p.numVertices ||
46 | vi2 < 0 || vi2 >= p.numVertices)
47 | return;
48 |
49 | // In instance mode, adjust vertex indices by minibatch index.
50 | if (p.instance_mode)
51 | {
52 | vi0 += pz * p.numVertices;
53 | vi1 += pz * p.numVertices;
54 | vi2 += pz * p.numVertices;
55 | }
56 |
57 | // Fetch vertex positions.
58 | float4 p0 = ((float4*)p.pos)[vi0];
59 | float4 p1 = ((float4*)p.pos)[vi1];
60 | float4 p2 = ((float4*)p.pos)[vi2];
61 |
62 | // Evaluate edge functions.
63 | float fx = p.xs * (float)px + p.xo;
64 | float fy = p.ys * (float)py + p.yo;
65 | float p0x = p0.x - fx * p0.w;
66 | float p0y = p0.y - fy * p0.w;
67 | float p1x = p1.x - fx * p1.w;
68 | float p1y = p1.y - fy * p1.w;
69 | float p2x = p2.x - fx * p2.w;
70 | float p2y = p2.y - fy * p2.w;
71 | float a0 = p1x*p2y - p1y*p2x;
72 | float a1 = p2x*p0y - p2y*p0x;
73 | float a2 = p0x*p1y - p0y*p1x;
74 |
75 | // Perspective correct, normalized barycentrics.
76 | float iw = 1.f / (a0 + a1 + a2);
77 | float b0 = a0 * iw;
78 | float b1 = a1 * iw;
79 |
80 | // Compute z/w for depth buffer.
81 | float z = p0.z * a0 + p1.z * a1 + p2.z * a2;
82 | float w = p0.w * a0 + p1.w * a1 + p2.w * a2;
83 | float zw = z / w;
84 |
85 | // Clamps to avoid NaNs.
86 | b0 = __saturatef(b0); // Clamp to [+0.0, 1.0].
87 | b1 = __saturatef(b1); // Clamp to [+0.0, 1.0].
88 | zw = fmaxf(fminf(zw, 1.f), -1.f);
89 |
90 | // Emit output.
91 | ((float4*)p.out)[pidx_out] = make_float4(b0, b1, zw, triidx_to_float(triIdx + 1));
92 |
93 | // Calculate bary pixel differentials.
94 | float dfxdx = p.xs * iw;
95 | float dfydy = p.ys * iw;
96 | float da0dx = p2.y*p1.w - p1.y*p2.w;
97 | float da0dy = p1.x*p2.w - p2.x*p1.w;
98 | float da1dx = p0.y*p2.w - p2.y*p0.w;
99 | float da1dy = p2.x*p0.w - p0.x*p2.w;
100 | float da2dx = p1.y*p0.w - p0.y*p1.w;
101 | float da2dy = p0.x*p1.w - p1.x*p0.w;
102 | float datdx = da0dx + da1dx + da2dx;
103 | float datdy = da0dy + da1dy + da2dy;
104 | float dudx = dfxdx * (b0 * datdx - da0dx);
105 | float dudy = dfydy * (b0 * datdy - da0dy);
106 | float dvdx = dfxdx * (b1 * datdx - da1dx);
107 | float dvdy = dfydy * (b1 * datdy - da1dy);
108 |
109 | // Emit bary pixel differentials.
110 | ((float4*)p.out_db)[pidx_out] = make_float4(dudx, dudy, dvdx, dvdy);
111 | }
112 |
113 | //------------------------------------------------------------------------
114 | // Gradient Cuda kernel.
115 |
116 | template
117 | static __forceinline__ __device__ void RasterizeGradKernelTemplate(const RasterizeGradParams p)
118 | {
119 | // Temporary space for coalesced atomics.
120 | CA_DECLARE_TEMP(RAST_GRAD_MAX_KERNEL_BLOCK_WIDTH * RAST_GRAD_MAX_KERNEL_BLOCK_HEIGHT);
121 |
122 | // Calculate pixel position.
123 | int px = blockIdx.x * blockDim.x + threadIdx.x;
124 | int py = blockIdx.y * blockDim.y + threadIdx.y;
125 | int pz = blockIdx.z;
126 | if (px >= p.width || py >= p.height || pz >= p.depth)
127 | return;
128 |
129 | // Pixel index.
130 | int pidx = px + p.width * (py + p.height * pz);
131 |
132 | // Read triangle idx and dy.
133 | float2 dy = ((float2*)p.dy)[pidx * 2];
134 | float4 ddb = ENABLE_DB ? ((float4*)p.ddb)[pidx] : make_float4(0.f, 0.f, 0.f, 0.f);
135 | int triIdx = float_to_triidx(((float*)p.out)[pidx * 4 + 3]) - 1;
136 |
137 | // Exit if nothing to do.
138 | if (triIdx < 0 || triIdx >= p.numTriangles)
139 | return; // No or corrupt triangle.
140 | int grad_all_dy = __float_as_int(dy.x) | __float_as_int(dy.y); // Bitwise OR of all incoming gradients.
141 | int grad_all_ddb = 0;
142 | if (ENABLE_DB)
143 | grad_all_ddb = __float_as_int(ddb.x) | __float_as_int(ddb.y) | __float_as_int(ddb.z) | __float_as_int(ddb.w);
144 | if (((grad_all_dy | grad_all_ddb) << 1) == 0)
145 | return; // All incoming gradients are +0/-0.
146 |
147 | // Fetch vertex indices.
148 | int vi0 = p.tri[triIdx * 3 + 0];
149 | int vi1 = p.tri[triIdx * 3 + 1];
150 | int vi2 = p.tri[triIdx * 3 + 2];
151 |
152 | // Bail out if vertex indices are corrupt.
153 | if (vi0 < 0 || vi0 >= p.numVertices ||
154 | vi1 < 0 || vi1 >= p.numVertices ||
155 | vi2 < 0 || vi2 >= p.numVertices)
156 | return;
157 |
158 | // In instance mode, adjust vertex indices by minibatch index.
159 | if (p.instance_mode)
160 | {
161 | vi0 += pz * p.numVertices;
162 | vi1 += pz * p.numVertices;
163 | vi2 += pz * p.numVertices;
164 | }
165 |
166 | // Initialize coalesced atomics.
167 | CA_SET_GROUP(triIdx);
168 |
169 | // Fetch vertex positions.
170 | float4 p0 = ((float4*)p.pos)[vi0];
171 | float4 p1 = ((float4*)p.pos)[vi1];
172 | float4 p2 = ((float4*)p.pos)[vi2];
173 |
174 | // Evaluate edge functions.
175 | float fx = p.xs * (float)px + p.xo;
176 | float fy = p.ys * (float)py + p.yo;
177 | float p0x = p0.x - fx * p0.w;
178 | float p0y = p0.y - fy * p0.w;
179 | float p1x = p1.x - fx * p1.w;
180 | float p1y = p1.y - fy * p1.w;
181 | float p2x = p2.x - fx * p2.w;
182 | float p2y = p2.y - fy * p2.w;
183 | float a0 = p1x*p2y - p1y*p2x;
184 | float a1 = p2x*p0y - p2y*p0x;
185 | float a2 = p0x*p1y - p0y*p1x;
186 |
187 | // Compute inverse area with epsilon.
188 | float at = a0 + a1 + a2;
189 | float ep = copysignf(1e-6f, at); // ~1 pixel in 1k x 1k image.
190 | float iw = 1.f / (at + ep);
191 |
192 | // Perspective correct, normalized barycentrics.
193 | float b0 = a0 * iw;
194 | float b1 = a1 * iw;
195 |
196 | // Position gradients.
197 | float gb0 = dy.x * iw;
198 | float gb1 = dy.y * iw;
199 | float gbb = gb0 * b0 + gb1 * b1;
200 | float gp0x = gbb * (p2y - p1y) - gb1 * p2y;
201 | float gp1x = gbb * (p0y - p2y) + gb0 * p2y;
202 | float gp2x = gbb * (p1y - p0y) - gb0 * p1y + gb1 * p0y;
203 | float gp0y = gbb * (p1x - p2x) + gb1 * p2x;
204 | float gp1y = gbb * (p2x - p0x) - gb0 * p2x;
205 | float gp2y = gbb * (p0x - p1x) + gb0 * p1x - gb1 * p0x;
206 | float gp0w = -fx * gp0x - fy * gp0y;
207 | float gp1w = -fx * gp1x - fy * gp1y;
208 | float gp2w = -fx * gp2x - fy * gp2y;
209 |
210 | // Bary differential gradients.
211 | if (ENABLE_DB && ((grad_all_ddb) << 1) != 0)
212 | {
213 | float dfxdX = p.xs * iw;
214 | float dfydY = p.ys * iw;
215 | ddb.x *= dfxdX;
216 | ddb.y *= dfydY;
217 | ddb.z *= dfxdX;
218 | ddb.w *= dfydY;
219 |
220 | float da0dX = p1.y * p2.w - p2.y * p1.w;
221 | float da1dX = p2.y * p0.w - p0.y * p2.w;
222 | float da2dX = p0.y * p1.w - p1.y * p0.w;
223 | float da0dY = p2.x * p1.w - p1.x * p2.w;
224 | float da1dY = p0.x * p2.w - p2.x * p0.w;
225 | float da2dY = p1.x * p0.w - p0.x * p1.w;
226 | float datdX = da0dX + da1dX + da2dX;
227 | float datdY = da0dY + da1dY + da2dY;
228 |
229 | float x01 = p0.x - p1.x;
230 | float x12 = p1.x - p2.x;
231 | float x20 = p2.x - p0.x;
232 | float y01 = p0.y - p1.y;
233 | float y12 = p1.y - p2.y;
234 | float y20 = p2.y - p0.y;
235 | float w01 = p0.w - p1.w;
236 | float w12 = p1.w - p2.w;
237 | float w20 = p2.w - p0.w;
238 |
239 | float a0p1 = fy * p2.x - fx * p2.y;
240 | float a0p2 = fx * p1.y - fy * p1.x;
241 | float a1p0 = fx * p2.y - fy * p2.x;
242 | float a1p2 = fy * p0.x - fx * p0.y;
243 |
244 | float wdudX = 2.f * b0 * datdX - da0dX;
245 | float wdudY = 2.f * b0 * datdY - da0dY;
246 | float wdvdX = 2.f * b1 * datdX - da1dX;
247 | float wdvdY = 2.f * b1 * datdY - da1dY;
248 |
249 | float c0 = iw * (ddb.x * wdudX + ddb.y * wdudY + ddb.z * wdvdX + ddb.w * wdvdY);
250 | float cx = c0 * fx - ddb.x * b0 - ddb.z * b1;
251 | float cy = c0 * fy - ddb.y * b0 - ddb.w * b1;
252 | float cxy = iw * (ddb.x * datdX + ddb.y * datdY);
253 | float czw = iw * (ddb.z * datdX + ddb.w * datdY);
254 |
255 | gp0x += c0 * y12 - cy * w12 + czw * p2y + ddb.w * p2.w;
256 | gp1x += c0 * y20 - cy * w20 - cxy * p2y - ddb.y * p2.w;
257 | gp2x += c0 * y01 - cy * w01 + cxy * p1y - czw * p0y + ddb.y * p1.w - ddb.w * p0.w;
258 | gp0y += cx * w12 - c0 * x12 - czw * p2x - ddb.z * p2.w;
259 | gp1y += cx * w20 - c0 * x20 + cxy * p2x + ddb.x * p2.w;
260 | gp2y += cx * w01 - c0 * x01 - cxy * p1x + czw * p0x - ddb.x * p1.w + ddb.z * p0.w;
261 | gp0w += cy * x12 - cx * y12 - czw * a1p0 + ddb.z * p2.y - ddb.w * p2.x;
262 | gp1w += cy * x20 - cx * y20 - cxy * a0p1 - ddb.x * p2.y + ddb.y * p2.x;
263 | gp2w += cy * x01 - cx * y01 - cxy * a0p2 - czw * a1p2 + ddb.x * p1.y - ddb.y * p1.x - ddb.z * p0.y + ddb.w * p0.x;
264 | }
265 |
266 | // Accumulate using coalesced atomics.
267 | caAtomicAdd3_xyw(p.grad + 4 * vi0, gp0x, gp0y, gp0w);
268 | caAtomicAdd3_xyw(p.grad + 4 * vi1, gp1x, gp1y, gp1w);
269 | caAtomicAdd3_xyw(p.grad + 4 * vi2, gp2x, gp2y, gp2w);
270 | }
271 |
272 | // Template specializations.
273 | __global__ void RasterizeGradKernel (const RasterizeGradParams p) { RasterizeGradKernelTemplate(p); }
274 | __global__ void RasterizeGradKernelDb(const RasterizeGradParams p) { RasterizeGradKernelTemplate(p); }
275 |
276 | //------------------------------------------------------------------------
277 |
--------------------------------------------------------------------------------
/nvdiffrast/common/rasterize.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 |
11 | //------------------------------------------------------------------------
12 | // Constants and helpers.
13 |
14 | #define RAST_CUDA_FWD_SHADER_KERNEL_BLOCK_WIDTH 8
15 | #define RAST_CUDA_FWD_SHADER_KERNEL_BLOCK_HEIGHT 8
16 | #define RAST_GRAD_MAX_KERNEL_BLOCK_WIDTH 8
17 | #define RAST_GRAD_MAX_KERNEL_BLOCK_HEIGHT 8
18 |
19 | //------------------------------------------------------------------------
20 | // CUDA forward rasterizer shader kernel params.
21 |
22 | struct RasterizeCudaFwdShaderParams
23 | {
24 | const float* pos; // Vertex positions.
25 | const int* tri; // Triangle indices.
26 | const int* in_idx; // Triangle idx buffer from rasterizer.
27 | float* out; // Main output buffer.
28 | float* out_db; // Bary pixel gradient output buffer.
29 | int numTriangles; // Number of triangles.
30 | int numVertices; // Number of vertices.
31 | int width_in; // Input image width.
32 | int height_in; // Input image height.
33 | int width_out; // Output image width.
34 | int height_out; // Output image height.
35 | int depth; // Size of minibatch.
36 | int instance_mode; // 1 if in instance rendering mode.
37 | float xs, xo, ys, yo; // Pixel position to clip-space x, y transform.
38 | };
39 |
40 | //------------------------------------------------------------------------
41 | // Gradient CUDA kernel params.
42 |
43 | struct RasterizeGradParams
44 | {
45 | const float* pos; // Incoming position buffer.
46 | const int* tri; // Incoming triangle buffer.
47 | const float* out; // Rasterizer output buffer.
48 | const float* dy; // Incoming gradients of rasterizer output buffer.
49 | const float* ddb; // Incoming gradients of bary diff output buffer.
50 | float* grad; // Outgoing position gradients.
51 | int numTriangles; // Number of triangles.
52 | int numVertices; // Number of vertices.
53 | int width; // Image width.
54 | int height; // Image height.
55 | int depth; // Size of minibatch.
56 | int instance_mode; // 1 if in instance rendering mode.
57 | float xs, xo, ys, yo; // Pixel position to clip-space x, y transform.
58 | };
59 |
60 | //------------------------------------------------------------------------
61 |
--------------------------------------------------------------------------------
/nvdiffrast/common/rasterize_gl.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 |
11 | //------------------------------------------------------------------------
12 | // Do not try to include OpenGL stuff when compiling CUDA kernels for torch.
13 |
14 | #if !(defined(NVDR_TORCH) && defined(__CUDACC__))
15 | #include "framework.h"
16 | #include "glutil.h"
17 |
18 | //------------------------------------------------------------------------
19 | // OpenGL-related persistent state for forward op.
20 |
21 | struct RasterizeGLState // Must be initializable by memset to zero.
22 | {
23 | int width; // Allocated frame buffer width.
24 | int height; // Allocated frame buffer height.
25 | int depth; // Allocated frame buffer depth.
26 | int posCount; // Allocated position buffer in floats.
27 | int triCount; // Allocated triangle buffer in ints.
28 | GLContext glctx;
29 | GLuint glFBO;
30 | GLuint glColorBuffer[2];
31 | GLuint glPrevOutBuffer;
32 | GLuint glDepthStencilBuffer;
33 | GLuint glVAO;
34 | GLuint glTriBuffer;
35 | GLuint glPosBuffer;
36 | GLuint glProgram;
37 | GLuint glProgramDP;
38 | GLuint glVertexShader;
39 | GLuint glGeometryShader;
40 | GLuint glFragmentShader;
41 | GLuint glFragmentShaderDP;
42 | cudaGraphicsResource_t cudaColorBuffer[2];
43 | cudaGraphicsResource_t cudaPrevOutBuffer;
44 | cudaGraphicsResource_t cudaPosBuffer;
45 | cudaGraphicsResource_t cudaTriBuffer;
46 | int enableDB;
47 | int enableZModify; // Modify depth in shader, workaround for a rasterization issue on A100.
48 | };
49 |
50 | //------------------------------------------------------------------------
51 | // Shared C++ code prototypes.
52 |
53 | void rasterizeInitGLContext(NVDR_CTX_ARGS, RasterizeGLState& s, int cudaDeviceIdx);
54 | void rasterizeResizeBuffers(NVDR_CTX_ARGS, RasterizeGLState& s, bool& changes, int posCount, int triCount, int width, int height, int depth);
55 | void rasterizeRender(NVDR_CTX_ARGS, RasterizeGLState& s, cudaStream_t stream, const float* posPtr, int posCount, int vtxPerInstance, const int32_t* triPtr, int triCount, const int32_t* rangesPtr, int width, int height, int depth, int peeling_idx);
56 | void rasterizeCopyResults(NVDR_CTX_ARGS, RasterizeGLState& s, cudaStream_t stream, float** outputPtr, int width, int height, int depth);
57 | void rasterizeReleaseBuffers(NVDR_CTX_ARGS, RasterizeGLState& s);
58 |
59 | //------------------------------------------------------------------------
60 | #endif // !(defined(NVDR_TORCH) && defined(__CUDACC__))
61 |
--------------------------------------------------------------------------------
/nvdiffrast/common/texture.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "framework.h"
10 | #include "texture.h"
11 |
12 | //------------------------------------------------------------------------
13 | // Mip stack construction and access helpers.
14 |
15 | void raiseMipSizeError(NVDR_CTX_ARGS, const TextureKernelParams& p)
16 | {
17 | char buf[1024];
18 | int bufsz = 1024;
19 |
20 | std::string msg = "Mip-map size error - cannot downsample an odd extent greater than 1. Resize the texture so that both spatial extents are powers of two, or limit the number of mip maps using max_mip_level argument.\n";
21 |
22 | int w = p.texWidth;
23 | int h = p.texHeight;
24 | bool ew = false;
25 | bool eh = false;
26 |
27 | msg += "Attempted mip stack construction:\n";
28 | msg += "level width height\n";
29 | msg += "----- ----- ------\n";
30 | snprintf(buf, bufsz, "base %5d %5d\n", w, h);
31 | msg += buf;
32 |
33 | int mipTotal = 0;
34 | int level = 0;
35 | while ((w|h) > 1 && !(ew || eh)) // Stop at first impossible size.
36 | {
37 | // Current level.
38 | level += 1;
39 |
40 | // Determine if downsampling fails.
41 | ew = ew || (w > 1 && (w & 1));
42 | eh = eh || (h > 1 && (h & 1));
43 |
44 | // Downsample.
45 | if (w > 1) w >>= 1;
46 | if (h > 1) h >>= 1;
47 |
48 | // Append level size to error message.
49 | snprintf(buf, bufsz, "mip %-2d ", level);
50 | msg += buf;
51 | if (ew) snprintf(buf, bufsz, " err ");
52 | else snprintf(buf, bufsz, "%5d ", w);
53 | msg += buf;
54 | if (eh) snprintf(buf, bufsz, " err\n");
55 | else snprintf(buf, bufsz, "%5d\n", h);
56 | msg += buf;
57 | }
58 |
59 | NVDR_CHECK(0, msg);
60 | }
61 |
62 | int calculateMipInfo(NVDR_CTX_ARGS, TextureKernelParams& p, int* mipOffsets)
63 | {
64 | // No levels at all?
65 | if (p.mipLevelLimit == 0)
66 | {
67 | p.mipLevelMax = 0;
68 | return 0;
69 | }
70 |
71 | // Current level size.
72 | int w = p.texWidth;
73 | int h = p.texHeight;
74 |
75 | int mipTotal = 0;
76 | int level = 0;
77 | int c = (p.boundaryMode == TEX_BOUNDARY_MODE_CUBE) ? (p.channels * 6) : p.channels;
78 | mipOffsets[0] = 0;
79 | while ((w|h) > 1)
80 | {
81 | // Current level.
82 | level += 1;
83 |
84 | // Quit if cannot downsample.
85 | if ((w > 1 && (w & 1)) || (h > 1 && (h & 1)))
86 | raiseMipSizeError(NVDR_CTX_PARAMS, p);
87 |
88 | // Downsample.
89 | if (w > 1) w >>= 1;
90 | if (h > 1) h >>= 1;
91 |
92 | mipOffsets[level] = mipTotal; // Store the mip offset (#floats).
93 | mipTotal += w * h * p.texDepth * c;
94 |
95 | // Hit the level limit?
96 | if (p.mipLevelLimit >= 0 && level == p.mipLevelLimit)
97 | break;
98 | }
99 |
100 | p.mipLevelMax = level;
101 | return mipTotal;
102 | }
103 |
104 | //------------------------------------------------------------------------
105 |
--------------------------------------------------------------------------------
/nvdiffrast/common/texture.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 | #include "framework.h"
11 |
12 | //------------------------------------------------------------------------
13 | // Constants.
14 |
15 | #define TEX_DEBUG_MIP_RETAIN_VARIANCE 0 // For debugging
16 | #define TEX_FWD_MAX_KERNEL_BLOCK_WIDTH 8
17 | #define TEX_FWD_MAX_KERNEL_BLOCK_HEIGHT 8
18 | #define TEX_FWD_MAX_MIP_KERNEL_BLOCK_WIDTH 8
19 | #define TEX_FWD_MAX_MIP_KERNEL_BLOCK_HEIGHT 8
20 | #define TEX_GRAD_MAX_KERNEL_BLOCK_WIDTH 8
21 | #define TEX_GRAD_MAX_KERNEL_BLOCK_HEIGHT 8
22 | #define TEX_GRAD_MAX_MIP_KERNEL_BLOCK_WIDTH 8
23 | #define TEX_GRAD_MAX_MIP_KERNEL_BLOCK_HEIGHT 8
24 | #define TEX_MAX_MIP_LEVEL 16 // Currently a texture cannot be larger than 2 GB because we use 32-bit indices everywhere.
25 | #define TEX_MODE_NEAREST 0 // Nearest on base level.
26 | #define TEX_MODE_LINEAR 1 // Bilinear on base level.
27 | #define TEX_MODE_LINEAR_MIPMAP_NEAREST 2 // Bilinear on nearest mip level.
28 | #define TEX_MODE_LINEAR_MIPMAP_LINEAR 3 // Trilinear.
29 | #define TEX_MODE_COUNT 4
30 | #define TEX_BOUNDARY_MODE_CUBE 0 // Cube map mode.
31 | #define TEX_BOUNDARY_MODE_WRAP 1 // Wrap (u, v).
32 | #define TEX_BOUNDARY_MODE_CLAMP 2 // Clamp (u, v).
33 | #define TEX_BOUNDARY_MODE_ZERO 3 // Pad with zeros.
34 | #define TEX_BOUNDARY_MODE_COUNT 4
35 |
36 | //------------------------------------------------------------------------
37 | // CUDA kernel params.
38 |
39 | struct TextureKernelParams
40 | {
41 | const float* tex[TEX_MAX_MIP_LEVEL]; // Incoming texture buffer with mip levels.
42 | const float* uv; // Incoming texcoord buffer.
43 | const float* uvDA; // Incoming uv pixel diffs or NULL.
44 | const float* mipLevelBias; // Incoming mip level bias or NULL.
45 | const float* dy; // Incoming output gradient.
46 | float* out; // Outgoing texture data.
47 | float* gradTex[TEX_MAX_MIP_LEVEL]; // Outgoing texture gradients with mip levels.
48 | float* gradUV; // Outgoing texcoord gradient.
49 | float* gradUVDA; // Outgoing texcoord pixel differential gradient.
50 | float* gradMipLevelBias; // Outgoing mip level bias gradient.
51 | int enableMip; // If true, we have uv_da and/or mip_level_bias input(s), and a mip tensor.
52 | int filterMode; // One of the TEX_MODE_ constants.
53 | int boundaryMode; // One of the TEX_BOUNDARY_MODE_ contants.
54 | int texConst; // If true, texture is known to be constant.
55 | int mipLevelLimit; // Mip level limit coming from the op.
56 | int channels; // Number of texture channels.
57 | int imgWidth; // Image width.
58 | int imgHeight; // Image height.
59 | int texWidth; // Texture width.
60 | int texHeight; // Texture height.
61 | int texDepth; // Texture depth.
62 | int n; // Minibatch size.
63 | int mipLevelMax; // Maximum mip level index. Zero if mips disabled.
64 | int mipLevelOut; // Mip level being calculated in builder kernel.
65 | };
66 |
67 | //------------------------------------------------------------------------
68 | // C++ helper function prototypes.
69 |
70 | void raiseMipSizeError(NVDR_CTX_ARGS, const TextureKernelParams& p);
71 | int calculateMipInfo(NVDR_CTX_ARGS, TextureKernelParams& p, int* mipOffsets);
72 |
73 | //------------------------------------------------------------------------
74 | // Macros.
75 |
76 | #define mipLevelSize(p, i) make_int2(((p).texWidth >> (i)) > 1 ? ((p).texWidth >> (i)) : 1, ((p).texHeight >> (i)) > 1 ? ((p).texHeight >> (i)) : 1)
77 |
78 | //------------------------------------------------------------------------
79 |
--------------------------------------------------------------------------------
/nvdiffrast/lib/setgpu.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/nvdiffrast/lib/setgpu.lib
--------------------------------------------------------------------------------
/nvdiffrast/tensorflow/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | from .ops import rasterize, interpolate, texture, antialias
10 | from .plugin_loader import set_cache_dir
11 |
12 | __all__ = ["rasterize", "interpolate", "texture", "antialias", "set_cache_dir"]
13 |
--------------------------------------------------------------------------------
/nvdiffrast/tensorflow/plugin_loader.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import glob
10 | import os
11 | import re
12 | import uuid
13 | import hashlib
14 | import tempfile
15 | import shutil
16 | import tensorflow as tf
17 | from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
18 |
19 | #----------------------------------------------------------------------------
20 | # Global options.
21 |
22 | _nvdiffrast_cache_dir = None
23 |
24 | def set_cache_dir(path: str) -> None:
25 | '''Set CUDA kernel compilation temp dir.
26 |
27 | If `set_cache_dir` is not called, the cache directory will default to
28 | one of the below:
29 |
30 | - Value of NVDIFFRAST_CACHE_DIR env var, if set
31 | - $HOME/.cache/nvdiffrast if HOME env var is set
32 | - $USERPROFILE/.cache/nvdiffrast if USERPROFILE is set.
33 |
34 | Args:
35 | path: Where to save CUDA kernel build temporaries
36 | '''
37 | global _nvdiffrast_cache_dir
38 | _nvdiffrast_cache_dir = path
39 |
40 | def make_cache_dir_path(*paths: str) -> str:
41 | if _nvdiffrast_cache_dir is not None:
42 | return os.path.join(_nvdiffrast_cache_dir, *paths)
43 | if 'NVDIFFRAST_CACHE_DIR' in os.environ:
44 | return os.path.join(os.environ['NVDIFFRAST_CACHE_DIR'], *paths)
45 | if 'HOME' in os.environ:
46 | return os.path.join(os.environ['HOME'], '.cache', 'nvdiffrast', *paths)
47 | if 'USERPROFILE' in os.environ:
48 | return os.path.join(os.environ['USERPROFILE'], '.cache', 'nvdiffrast', *paths)
49 | return os.path.join(tempfile.gettempdir(), '.cache', 'nvdiffrast', *paths)
50 |
51 | cuda_cache_version_tag = 'v1'
52 | do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe!
53 | verbose = True # Print status messages to stdout.
54 |
55 | #----------------------------------------------------------------------------
56 | # Internal helper funcs.
57 |
58 | def _find_compiler_bindir():
59 | hostx64_paths = sorted(glob.glob('C:/Program Files/Microsoft Visual Studio/*/Enterprise/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
60 | if hostx64_paths != []:
61 | return hostx64_paths[0]
62 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Enterprise/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
63 | if hostx64_paths != []:
64 | return hostx64_paths[0]
65 | hostx64_paths = sorted(glob.glob('C:/Program Files/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
66 | if hostx64_paths != []:
67 | return hostx64_paths[0]
68 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
69 | if hostx64_paths != []:
70 | return hostx64_paths[0]
71 | hostx64_paths = sorted(glob.glob('C:/Program Files/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
72 | if hostx64_paths != []:
73 | return hostx64_paths[0]
74 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
75 | if hostx64_paths != []:
76 | return hostx64_paths[0]
77 | hostx64_paths = sorted(glob.glob('C:/Program Files/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
78 | if hostx64_paths != []:
79 | return hostx64_paths[0]
80 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
81 | if hostx64_paths != []:
82 | return hostx64_paths[0]
83 | vc_bin_dir = 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin'
84 | if os.path.isdir(vc_bin_dir):
85 | return vc_bin_dir
86 | return None
87 |
88 | def _get_compute_cap(device):
89 | caps_str = device.physical_device_desc
90 | m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
91 | major = m.group(1)
92 | minor = m.group(2)
93 | return (major, minor)
94 |
95 | def _get_cuda_gpu_arch_string():
96 | gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
97 | if len(gpus) == 0:
98 | raise RuntimeError('No GPU devices found')
99 | (major, minor) = _get_compute_cap(gpus[0])
100 | return 'sm_%s%s' % (major, minor)
101 |
102 | def _run_cmd(cmd):
103 | with os.popen(cmd) as pipe:
104 | output = pipe.read()
105 | status = pipe.close()
106 | if status is not None:
107 | raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
108 |
109 | def _prepare_nvcc_cli(opts):
110 | cmd = 'nvcc ' + opts.strip()
111 | cmd += ' --disable-warnings'
112 | cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
113 | cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
114 | cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
115 | cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
116 |
117 | compiler_bindir = _find_compiler_bindir()
118 | if compiler_bindir is None:
119 | # Require that _find_compiler_bindir succeeds on Windows. Allow
120 | # nvcc to use whatever is the default on Linux.
121 | if os.name == 'nt':
122 | raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
123 | else:
124 | cmd += ' --compiler-bindir "%s"' % compiler_bindir
125 | cmd += ' 2>&1'
126 | return cmd
127 |
128 | #----------------------------------------------------------------------------
129 | # Main entry point.
130 |
131 | _plugin_cache = dict()
132 |
133 | def get_plugin(cuda_file, extra_nvcc_options=[]):
134 | cuda_file_base = os.path.basename(cuda_file)
135 | cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
136 |
137 | # Already in cache?
138 | if cuda_file in _plugin_cache:
139 | return _plugin_cache[cuda_file]
140 |
141 | # Setup plugin.
142 | if verbose:
143 | print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
144 | try:
145 | # Hash CUDA source.
146 | md5 = hashlib.md5()
147 | with open(cuda_file, 'rb') as f:
148 | md5.update(f.read())
149 | md5.update(b'\n')
150 |
151 | # Hash headers included by the CUDA code by running it through the preprocessor.
152 | if not do_not_hash_included_headers:
153 | if verbose:
154 | print('Preprocessing... ', end='', flush=True)
155 | with tempfile.TemporaryDirectory() as tmp_dir:
156 | tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
157 | _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
158 | with open(tmp_file, 'rb') as f:
159 | bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
160 | good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
161 | for ln in f:
162 | if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
163 | ln = ln.replace(bad_file_str, good_file_str)
164 | md5.update(ln)
165 | md5.update(b'\n')
166 |
167 | # Select compiler options.
168 | compile_opts = ''
169 | if os.name == 'nt':
170 | compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
171 | compile_opts += ' --library-path="%s"' % (os.path.dirname(__file__) + r"\..\lib") # Find libraries during compilation.
172 | elif os.name == 'posix':
173 | compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so')
174 | compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\''
175 | else:
176 | assert False # not Windows or Linux, w00t?
177 | compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string()
178 | compile_opts += ' --use_fast_math'
179 | for opt in extra_nvcc_options:
180 | compile_opts += ' ' + opt
181 | nvcc_cmd = _prepare_nvcc_cli(compile_opts)
182 |
183 | # Hash build configuration.
184 | md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
185 | md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
186 | md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
187 |
188 | # Compile if not already compiled.
189 | bin_file_ext = '.dll' if os.name == 'nt' else '.so'
190 | cuda_cache_path = make_cache_dir_path()
191 | bin_file = os.path.join(make_cache_dir_path(), cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
192 | if not os.path.isfile(bin_file):
193 | if verbose:
194 | print('Compiling... ', end='', flush=True)
195 | with tempfile.TemporaryDirectory() as tmp_dir:
196 | tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
197 | _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
198 | os.makedirs(cuda_cache_path, exist_ok=True)
199 | intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
200 | shutil.copyfile(tmp_file, intermediate_file)
201 | os.rename(intermediate_file, bin_file) # atomic
202 |
203 | # Load.
204 | if verbose:
205 | print('Loading... ', end='', flush=True)
206 | plugin = tf.load_op_library(bin_file)
207 |
208 | # Add to cache.
209 | _plugin_cache[cuda_file] = plugin
210 | if verbose:
211 | print('Done.', flush=True)
212 | return plugin
213 |
214 | except:
215 | if verbose:
216 | print('Failed!', flush=True)
217 | raise
218 |
219 | #----------------------------------------------------------------------------
220 |
--------------------------------------------------------------------------------
/nvdiffrast/tensorflow/tf_all.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | // TF-specific helpers.
10 |
11 | #define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal("Cuda error: ", cudaGetErrorName(err), "[", #CUDA_CALL, ";]")); } while (0)
12 | #define OP_CHECK_GL_ERROR(CTX, GL_CALL) do { GL_CALL; GLenum err = glGetError(); OP_REQUIRES(CTX, err == GL_NO_ERROR, errors::Internal("OpenGL error: ", getGLErrorString(err), "[", #GL_CALL, ";]")); } while (0)
13 |
14 | // Cuda kernels and CPP all together. What an absolute compilation unit.
15 |
16 | #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
17 | #include "../common/framework.h"
18 | #include "../common/glutil.cpp"
19 |
20 | #include "../common/common.h"
21 | #include "../common/common.cpp"
22 |
23 | #include "../common/rasterize.h"
24 | #include "../common/rasterize_gl.cpp"
25 | #include "../common/rasterize.cu"
26 | #include "tf_rasterize.cu"
27 |
28 | #include "../common/interpolate.cu"
29 | #include "tf_interpolate.cu"
30 |
31 | #include "../common/texture.cpp"
32 | #include "../common/texture.cu"
33 | #include "tf_texture.cu"
34 |
35 | #include "../common/antialias.cu"
36 | #include "tf_antialias.cu"
37 |
--------------------------------------------------------------------------------
/nvdiffrast/torch/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | from .ops import RasterizeCudaContext, RasterizeGLContext, get_log_level, set_log_level, rasterize, DepthPeeler, interpolate, texture, texture_construct_mip, antialias, antialias_construct_topology_hash
10 | __all__ = ["RasterizeCudaContext", "RasterizeGLContext", "get_log_level", "set_log_level", "rasterize", "DepthPeeler", "interpolate", "texture", "texture_construct_mip", "antialias", "antialias_construct_topology_hash"]
11 |
--------------------------------------------------------------------------------
/nvdiffrast/torch/torch_bindings.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "torch_common.inl"
10 | #include "torch_types.h"
11 | #include
12 |
13 | //------------------------------------------------------------------------
14 | // Op prototypes. Return type macros for readability.
15 |
16 | #define OP_RETURN_T torch::Tensor
17 | #define OP_RETURN_TT std::tuple
18 | #define OP_RETURN_TTT std::tuple
19 | #define OP_RETURN_TTTT std::tuple
20 | #define OP_RETURN_TTV std::tuple >
21 | #define OP_RETURN_TTTTV std::tuple >
22 |
23 | OP_RETURN_TT rasterize_fwd_cuda (RasterizeCRStateWrapper& stateWrapper, torch::Tensor pos, torch::Tensor tri, std::tuple resolution, torch::Tensor ranges, int peeling_idx);
24 | OP_RETURN_T rasterize_grad (torch::Tensor pos, torch::Tensor tri, torch::Tensor out, torch::Tensor dy);
25 | OP_RETURN_T rasterize_grad_db (torch::Tensor pos, torch::Tensor tri, torch::Tensor out, torch::Tensor dy, torch::Tensor ddb);
26 | OP_RETURN_TT interpolate_fwd (torch::Tensor attr, torch::Tensor rast, torch::Tensor tri);
27 | OP_RETURN_TT interpolate_fwd_da (torch::Tensor attr, torch::Tensor rast, torch::Tensor tri, torch::Tensor rast_db, bool diff_attrs_all, std::vector& diff_attrs_vec);
28 | OP_RETURN_TT interpolate_grad (torch::Tensor attr, torch::Tensor rast, torch::Tensor tri, torch::Tensor dy);
29 | OP_RETURN_TTT interpolate_grad_da (torch::Tensor attr, torch::Tensor rast, torch::Tensor tri, torch::Tensor dy, torch::Tensor rast_db, torch::Tensor dda, bool diff_attrs_all, std::vector& diff_attrs_vec);
30 | TextureMipWrapper texture_construct_mip (torch::Tensor tex, int max_mip_level, bool cube_mode);
31 | OP_RETURN_T texture_fwd (torch::Tensor tex, torch::Tensor uv, int filter_mode, int boundary_mode);
32 | OP_RETURN_T texture_fwd_mip (torch::Tensor tex, torch::Tensor uv, torch::Tensor uv_da, torch::Tensor mip_level_bias, TextureMipWrapper mip_wrapper, std::vector mip_stack, int filter_mode, int boundary_mode);
33 | OP_RETURN_T texture_grad_nearest (torch::Tensor tex, torch::Tensor uv, torch::Tensor dy, int filter_mode, int boundary_mode);
34 | OP_RETURN_TT texture_grad_linear (torch::Tensor tex, torch::Tensor uv, torch::Tensor dy, int filter_mode, int boundary_mode);
35 | OP_RETURN_TTV texture_grad_linear_mipmap_nearest (torch::Tensor tex, torch::Tensor uv, torch::Tensor dy, torch::Tensor uv_da, torch::Tensor mip_level_bias, TextureMipWrapper mip_wrapper, std::vector mip_stack, int filter_mode, int boundary_mode);
36 | OP_RETURN_TTTTV texture_grad_linear_mipmap_linear (torch::Tensor tex, torch::Tensor uv, torch::Tensor dy, torch::Tensor uv_da, torch::Tensor mip_level_bias, TextureMipWrapper mip_wrapper, std::vector mip_stack, int filter_mode, int boundary_mode);
37 | TopologyHashWrapper antialias_construct_topology_hash (torch::Tensor tri);
38 | OP_RETURN_TT antialias_fwd (torch::Tensor color, torch::Tensor rast, torch::Tensor pos, torch::Tensor tri, TopologyHashWrapper topology_hash);
39 | OP_RETURN_TT antialias_grad (torch::Tensor color, torch::Tensor rast, torch::Tensor pos, torch::Tensor tri, torch::Tensor dy, torch::Tensor work_buffer);
40 |
41 | //------------------------------------------------------------------------
42 |
43 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
44 | // State classes.
45 | pybind11::class_(m, "RasterizeCRStateWrapper").def(pybind11::init());
46 | pybind11::class_(m, "TextureMipWrapper").def(pybind11::init<>());
47 | pybind11::class_(m, "TopologyHashWrapper");
48 |
49 | // Plumbing to torch/c10 logging system.
50 | m.def("get_log_level", [](void) { return FLAGS_caffe2_log_level; }, "get log level");
51 | m.def("set_log_level", [](int level){ FLAGS_caffe2_log_level = level; }, "set log level");
52 |
53 | // Ops.
54 | m.def("rasterize_fwd_cuda", &rasterize_fwd_cuda, "rasterize forward op (cuda)");
55 | m.def("rasterize_grad", &rasterize_grad, "rasterize gradient op ignoring db gradients");
56 | m.def("rasterize_grad_db", &rasterize_grad_db, "rasterize gradient op with db gradients");
57 | m.def("interpolate_fwd", &interpolate_fwd, "interpolate forward op with attribute derivatives");
58 | m.def("interpolate_fwd_da", &interpolate_fwd_da, "interpolate forward op without attribute derivatives");
59 | m.def("interpolate_grad", &interpolate_grad, "interpolate gradient op with attribute derivatives");
60 | m.def("interpolate_grad_da", &interpolate_grad_da, "interpolate gradient op without attribute derivatives");
61 | m.def("texture_construct_mip", &texture_construct_mip, "texture mipmap construction");
62 | m.def("texture_fwd", &texture_fwd, "texture forward op without mipmapping");
63 | m.def("texture_fwd_mip", &texture_fwd_mip, "texture forward op with mipmapping");
64 | m.def("texture_grad_nearest", &texture_grad_nearest, "texture gradient op in nearest mode");
65 | m.def("texture_grad_linear", &texture_grad_linear, "texture gradient op in linear mode");
66 | m.def("texture_grad_linear_mipmap_nearest", &texture_grad_linear_mipmap_nearest, "texture gradient op in linear-mipmap-nearest mode");
67 | m.def("texture_grad_linear_mipmap_linear", &texture_grad_linear_mipmap_linear, "texture gradient op in linear-mipmap-linear mode");
68 | m.def("antialias_construct_topology_hash", &antialias_construct_topology_hash, "antialias topology hash construction");
69 | m.def("antialias_fwd", &antialias_fwd, "antialias forward op");
70 | m.def("antialias_grad", &antialias_grad, "antialias gradient op");
71 | }
72 |
73 | //------------------------------------------------------------------------
74 |
--------------------------------------------------------------------------------
/nvdiffrast/torch/torch_bindings_gl.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "torch_common.inl"
10 | #include "torch_types.h"
11 | #include
12 |
13 | //------------------------------------------------------------------------
14 | // Op prototypes.
15 |
16 | std::tuple rasterize_fwd_gl(RasterizeGLStateWrapper& stateWrapper, torch::Tensor pos, torch::Tensor tri, std::tuple resolution, torch::Tensor ranges, int peeling_idx);
17 |
18 | //------------------------------------------------------------------------
19 |
20 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
21 | // State classes.
22 | pybind11::class_(m, "RasterizeGLStateWrapper").def(pybind11::init())
23 | .def("set_context", &RasterizeGLStateWrapper::setContext)
24 | .def("release_context", &RasterizeGLStateWrapper::releaseContext);
25 |
26 | // Ops.
27 | m.def("rasterize_fwd_gl", &rasterize_fwd_gl, "rasterize forward op (opengl)");
28 | }
29 |
30 | //------------------------------------------------------------------------
31 |
--------------------------------------------------------------------------------
/nvdiffrast/torch/torch_common.inl:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 | #include "../common/framework.h"
11 |
12 | //------------------------------------------------------------------------
13 | // Input check helpers.
14 | //------------------------------------------------------------------------
15 |
16 | #ifdef _MSC_VER
17 | #define __func__ __FUNCTION__
18 | #endif
19 |
20 | #define NVDR_CHECK_DEVICE(...) do { TORCH_CHECK(at::cuda::check_device({__VA_ARGS__}), __func__, "(): Inputs " #__VA_ARGS__ " must reside on the same GPU device") } while(0)
21 | #define NVDR_CHECK_CPU(...) do { nvdr_check_cpu({__VA_ARGS__}, __func__, "(): Inputs " #__VA_ARGS__ " must reside on CPU"); } while(0)
22 | #define NVDR_CHECK_CONTIGUOUS(...) do { nvdr_check_contiguous({__VA_ARGS__}, __func__, "(): Inputs " #__VA_ARGS__ " must be contiguous tensors"); } while(0)
23 | #define NVDR_CHECK_F32(...) do { nvdr_check_f32({__VA_ARGS__}, __func__, "(): Inputs " #__VA_ARGS__ " must be float32 tensors"); } while(0)
24 | #define NVDR_CHECK_I32(...) do { nvdr_check_i32({__VA_ARGS__}, __func__, "(): Inputs " #__VA_ARGS__ " must be int32 tensors"); } while(0)
25 | inline void nvdr_check_cpu(at::ArrayRef ts, const char* func, const char* err_msg) { for (const at::Tensor& t : ts) TORCH_CHECK(t.device().type() == c10::DeviceType::CPU, func, err_msg); }
26 | inline void nvdr_check_contiguous(at::ArrayRef ts, const char* func, const char* err_msg) { for (const at::Tensor& t : ts) TORCH_CHECK(t.is_contiguous(), func, err_msg); }
27 | inline void nvdr_check_f32(at::ArrayRef ts, const char* func, const char* err_msg) { for (const at::Tensor& t : ts) TORCH_CHECK(t.dtype() == torch::kFloat32, func, err_msg); }
28 | inline void nvdr_check_i32(at::ArrayRef ts, const char* func, const char* err_msg) { for (const at::Tensor& t : ts) TORCH_CHECK(t.dtype() == torch::kInt32, func, err_msg); }
29 | //------------------------------------------------------------------------
30 |
--------------------------------------------------------------------------------
/nvdiffrast/torch/torch_rasterize_gl.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "torch_common.inl"
10 | #include "torch_types.h"
11 | #include "../common/common.h"
12 | #include "../common/rasterize_gl.h"
13 | #include
14 |
15 | //------------------------------------------------------------------------
16 | // Python GL state wrapper methods.
17 |
18 | RasterizeGLStateWrapper::RasterizeGLStateWrapper(bool enableDB, bool automatic_, int cudaDeviceIdx_)
19 | {
20 | pState = new RasterizeGLState();
21 | automatic = automatic_;
22 | cudaDeviceIdx = cudaDeviceIdx_;
23 | memset(pState, 0, sizeof(RasterizeGLState));
24 | pState->enableDB = enableDB ? 1 : 0;
25 | rasterizeInitGLContext(NVDR_CTX_PARAMS, *pState, cudaDeviceIdx_);
26 | releaseGLContext();
27 | }
28 |
29 | RasterizeGLStateWrapper::~RasterizeGLStateWrapper(void)
30 | {
31 | setGLContext(pState->glctx);
32 | rasterizeReleaseBuffers(NVDR_CTX_PARAMS, *pState);
33 | releaseGLContext();
34 | destroyGLContext(pState->glctx);
35 | delete pState;
36 | }
37 |
38 | void RasterizeGLStateWrapper::setContext(void)
39 | {
40 | setGLContext(pState->glctx);
41 | }
42 |
43 | void RasterizeGLStateWrapper::releaseContext(void)
44 | {
45 | releaseGLContext();
46 | }
47 |
48 | //------------------------------------------------------------------------
49 | // Forward op (OpenGL).
50 |
51 | std::tuple rasterize_fwd_gl(RasterizeGLStateWrapper& stateWrapper, torch::Tensor pos, torch::Tensor tri, std::tuple resolution, torch::Tensor ranges, int peeling_idx)
52 | {
53 | const at::cuda::OptionalCUDAGuard device_guard(device_of(pos));
54 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
55 | RasterizeGLState& s = *stateWrapper.pState;
56 |
57 | // Check inputs.
58 | NVDR_CHECK_DEVICE(pos, tri);
59 | NVDR_CHECK_CPU(ranges);
60 | NVDR_CHECK_CONTIGUOUS(pos, tri, ranges);
61 | NVDR_CHECK_F32(pos);
62 | NVDR_CHECK_I32(tri, ranges);
63 |
64 | // Check that GL context was created for the correct GPU.
65 | NVDR_CHECK(pos.get_device() == stateWrapper.cudaDeviceIdx, "GL context must must reside on the same device as input tensors");
66 |
67 | // Determine number of outputs
68 | int num_outputs = s.enableDB ? 2 : 1;
69 |
70 | // Determine instance mode and check input dimensions.
71 | bool instance_mode = pos.sizes().size() > 2;
72 | if (instance_mode)
73 | NVDR_CHECK(pos.sizes().size() == 3 && pos.size(0) > 0 && pos.size(1) > 0 && pos.size(2) == 4, "instance mode - pos must have shape [>0, >0, 4]");
74 | else
75 | {
76 | NVDR_CHECK(pos.sizes().size() == 2 && pos.size(0) > 0 && pos.size(1) == 4, "range mode - pos must have shape [>0, 4]");
77 | NVDR_CHECK(ranges.sizes().size() == 2 && ranges.size(0) > 0 && ranges.size(1) == 2, "range mode - ranges must have shape [>0, 2]");
78 | }
79 | NVDR_CHECK(tri.sizes().size() == 2 && tri.size(0) > 0 && tri.size(1) == 3, "tri must have shape [>0, 3]");
80 |
81 | // Get output shape.
82 | int height = std::get<0>(resolution);
83 | int width = std::get<1>(resolution);
84 | int depth = instance_mode ? pos.size(0) : ranges.size(0);
85 | NVDR_CHECK(height > 0 && width > 0, "resolution must be [>0, >0]");
86 |
87 | // Get position and triangle buffer sizes in int32/float32.
88 | int posCount = 4 * pos.size(0) * (instance_mode ? pos.size(1) : 1);
89 | int triCount = 3 * tri.size(0);
90 |
91 | // Set the GL context unless manual context.
92 | if (stateWrapper.automatic)
93 | setGLContext(s.glctx);
94 |
95 | // Resize all buffers.
96 | bool changes = false;
97 | rasterizeResizeBuffers(NVDR_CTX_PARAMS, s, changes, posCount, triCount, width, height, depth);
98 | if (changes)
99 | {
100 | #ifdef _WIN32
101 | // Workaround for occasional blank first frame on Windows.
102 | releaseGLContext();
103 | setGLContext(s.glctx);
104 | #endif
105 | }
106 |
107 | // Copy input data to GL and render.
108 | const float* posPtr = pos.data_ptr();
109 | const int32_t* rangesPtr = instance_mode ? 0 : ranges.data_ptr(); // This is in CPU memory.
110 | const int32_t* triPtr = tri.data_ptr();
111 | int vtxPerInstance = instance_mode ? pos.size(1) : 0;
112 | rasterizeRender(NVDR_CTX_PARAMS, s, stream, posPtr, posCount, vtxPerInstance, triPtr, triCount, rangesPtr, width, height, depth, peeling_idx);
113 |
114 | // Allocate output tensors.
115 | torch::TensorOptions opts = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA);
116 | torch::Tensor out = torch::empty({depth, height, width, 4}, opts);
117 | torch::Tensor out_db = torch::empty({depth, height, width, s.enableDB ? 4 : 0}, opts);
118 | float* outputPtr[2];
119 | outputPtr[0] = out.data_ptr();
120 | outputPtr[1] = s.enableDB ? out_db.data_ptr() : NULL;
121 |
122 | // Copy rasterized results into CUDA buffers.
123 | rasterizeCopyResults(NVDR_CTX_PARAMS, s, stream, outputPtr, width, height, depth);
124 |
125 | // Done. Release GL context and return.
126 | if (stateWrapper.automatic)
127 | releaseGLContext();
128 |
129 | return std::tuple(out, out_db);
130 | }
131 |
132 | //------------------------------------------------------------------------
133 |
--------------------------------------------------------------------------------
/nvdiffrast/torch/torch_types.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "torch_common.inl"
10 |
11 | //------------------------------------------------------------------------
12 | // Python GL state wrapper.
13 |
14 | class RasterizeGLState;
15 | class RasterizeGLStateWrapper
16 | {
17 | public:
18 | RasterizeGLStateWrapper (bool enableDB, bool automatic, int cudaDeviceIdx);
19 | ~RasterizeGLStateWrapper (void);
20 |
21 | void setContext (void);
22 | void releaseContext (void);
23 |
24 | RasterizeGLState* pState;
25 | bool automatic;
26 | int cudaDeviceIdx;
27 | };
28 |
29 | //------------------------------------------------------------------------
30 | // Python CudaRaster state wrapper.
31 |
32 | namespace CR { class CudaRaster; }
33 | class RasterizeCRStateWrapper
34 | {
35 | public:
36 | RasterizeCRStateWrapper (int cudaDeviceIdx);
37 | ~RasterizeCRStateWrapper (void);
38 |
39 | CR::CudaRaster* cr;
40 | int cudaDeviceIdx;
41 | };
42 |
43 | //------------------------------------------------------------------------
44 | // Mipmap wrapper to prevent intrusion from Python side.
45 |
46 | class TextureMipWrapper
47 | {
48 | public:
49 | torch::Tensor mip;
50 | int max_mip_level;
51 | std::vector texture_size; // For error checking.
52 | bool cube_mode; // For error checking.
53 | };
54 |
55 |
56 | //------------------------------------------------------------------------
57 | // Antialias topology hash wrapper to prevent intrusion from Python side.
58 |
59 | class TopologyHashWrapper
60 | {
61 | public:
62 | torch::Tensor ev_hash;
63 | };
64 |
65 | //------------------------------------------------------------------------
66 |
--------------------------------------------------------------------------------
/run_sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # NVIDIA CORPORATION and its licensors retain all intellectual property
6 | # and proprietary rights in and to this software, related documentation
7 | # and any modifications thereto. Any use, reproduction, disclosure or
8 | # distribution of this software and related documentation without an express
9 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
10 |
11 | function print_help {
12 | echo "Usage: `basename $0` [--build-container] "
13 | echo ""
14 | echo "Option --build-container will build the Docker container based on"
15 | echo "docker/Dockerfile and tag the image with gltorch:latest."
16 | echo ""
17 | echo "Example: `basename $0` samples/torch/envphong.py"
18 | }
19 |
20 | build_container=0
21 | sample=""
22 | while [[ "$#" -gt 0 ]]; do
23 | case $1 in
24 | --build-container) build_container=1;;
25 | -h|--help) print_help; exit 0 ;;
26 | --*) echo "Unknown parameter passed: $1"; exit 1 ;;
27 | *) sample="$1"; shift; break;
28 | esac
29 | shift
30 | done
31 |
32 | rest=$@
33 |
34 | # Build the docker container
35 | if [ "$build_container" = "1" ]; then
36 | docker build --tag gltorch:latest -f docker/Dockerfile .
37 | fi
38 |
39 | if [ ! -f "$sample" ]; then
40 | echo
41 | echo "No python sample given or file '$sample' not found. Exiting."
42 | exit 1
43 | fi
44 |
45 | image="gltorch:latest"
46 |
47 | echo "Using container image: $image"
48 | echo "Running command: $sample $rest"
49 |
50 | # Run a sample with docker
51 | docker run --rm -it --gpus all --user $(id -u):$(id -g) \
52 | -v `pwd`:/app --workdir /app -e TORCH_EXTENSIONS_DIR=/app/tmp $image python3 $sample $rest
53 |
--------------------------------------------------------------------------------
/samples/data/cube_c.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/cube_c.npz
--------------------------------------------------------------------------------
/samples/data/cube_d.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/cube_d.npz
--------------------------------------------------------------------------------
/samples/data/cube_p.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/cube_p.npz
--------------------------------------------------------------------------------
/samples/data/earth.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/earth.npz
--------------------------------------------------------------------------------
/samples/data/envphong.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/nvdiffrast/729261dc64c4241ea36efda84fbf532cc8b425b8/samples/data/envphong.npz
--------------------------------------------------------------------------------
/samples/tensorflow/cube.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import numpy as np
10 | import os
11 | import sys
12 | import pathlib
13 |
14 | import util
15 | import tensorflow as tf
16 |
17 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) # for nvdiffrast
18 | import nvdiffrast.tensorflow as dr
19 |
20 | #----------------------------------------------------------------------------
21 | # Cube shape fitter.
22 | #----------------------------------------------------------------------------
23 |
24 | def fit_cube(max_iter = 5000,
25 | resolution = 4,
26 | discontinuous = False,
27 | repeats = 1,
28 | log_interval = 10,
29 | display_interval = None,
30 | display_res = 512,
31 | out_dir = '.',
32 | log_fn = None,
33 | imgsave_interval = None,
34 | imgsave_fn = None):
35 |
36 | if out_dir:
37 | os.makedirs(out_dir, exist_ok=True)
38 |
39 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
40 | fn = 'cube_%s.npz' % ('d' if discontinuous else 'c')
41 | with np.load(f'{datadir}/{fn}') as f:
42 | pos_idx, vtxp, col_idx, vtxc = f.values()
43 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0]))
44 |
45 | # Transformation matrix input to TF graph.
46 | mtx_in = tf.placeholder(tf.float32, [4, 4])
47 |
48 | # Setup TF graph for reference.
49 | vtxw = np.concatenate([vtxp, np.ones([vtxp.shape[0], 1])], axis=1).astype(np.float32)
50 | pos_clip = tf.matmul(vtxw, mtx_in, transpose_b=True)[tf.newaxis, ...]
51 | rast_out, _ = dr.rasterize(pos_clip, pos_idx, resolution=[resolution, resolution], output_db=False)
52 | color, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out, col_idx)
53 | color = dr.antialias(color, rast_out, pos_clip, pos_idx)
54 |
55 | # Optimized variables.
56 | vtxc_opt = tf.get_variable('vtxc', initializer=tf.zeros_initializer(), shape=vtxc.shape)
57 | vtxp_opt = tf.get_variable('vtxp', initializer=tf.zeros_initializer(), shape=vtxp.shape)
58 |
59 | # Optimization variable setters for initialization.
60 | vtxc_opt_in = tf.placeholder(tf.float32, vtxc.shape)
61 | vtxp_opt_in = tf.placeholder(tf.float32, vtxp.shape)
62 | opt_set = tf.group(tf.assign(vtxc_opt, vtxc_opt_in), tf.assign(vtxp_opt, vtxp_opt_in))
63 |
64 | # Setup TF graph for what we optimize result.
65 | vtxw_opt = tf.concat([vtxp_opt, tf.ones([vtxp.shape[0], 1], tf.float32)], axis=1)
66 | pos_clip_opt = tf.matmul(vtxw_opt, mtx_in, transpose_b=True)[tf.newaxis, ...]
67 | rast_out_opt, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[resolution, resolution], output_db=False)
68 | color_opt, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_opt, col_idx)
69 | color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx)
70 |
71 | # Image-space loss and optimizer.
72 | loss = tf.reduce_mean((color_opt - color)**2)
73 | lr_in = tf.placeholder(tf.float32, [])
74 | train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.999).minimize(loss, var_list=[vtxp_opt, vtxc_opt])
75 |
76 | # Setup TF graph for display.
77 | rast_out_disp, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[display_res, display_res], output_db=False)
78 | color_disp, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_disp, col_idx)
79 | color_disp = dr.antialias(color_disp, rast_out_disp, pos_clip_opt, pos_idx)
80 | rast_out_disp_ref, _ = dr.rasterize(pos_clip, pos_idx, resolution=[display_res, display_res], output_db=False)
81 | color_disp_ref, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out_disp_ref, col_idx)
82 | color_disp_ref = dr.antialias(color_disp_ref, rast_out_disp_ref, pos_clip, pos_idx)
83 |
84 | # Geometric error calculation
85 | geom_loss = tf.reduce_mean(tf.reduce_sum((tf.abs(vtxp_opt) - .5)**2, axis=1)**0.5)
86 |
87 | # Open log file.
88 | log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None
89 |
90 | # Repeats.
91 | for rep in range(repeats):
92 |
93 | # Optimize.
94 | ang = 0.0
95 | gl_avg = []
96 | util.init_uninitialized_vars()
97 | for it in range(max_iter + 1):
98 | # Initialize optimization.
99 | if it == 0:
100 | vtxp_init = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp
101 | vtxc_init = np.random.uniform(0.0, 1.0, size=vtxc.shape)
102 | util.run(opt_set, {vtxc_opt_in: vtxc_init.astype(np.float32), vtxp_opt_in: vtxp_init.astype(np.float32)})
103 |
104 | # Learning rate ramp.
105 | lr = 1e-2
106 | lr = lr * max(0.01, 10**(-it*0.0005))
107 |
108 | # Random rotation/translation matrix for optimization.
109 | r_rot = util.random_rotation_translation(0.25)
110 |
111 | # Smooth rotation for display.
112 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
113 |
114 | # Modelview and modelview + projection matrices.
115 | proj = util.projection(x=0.4)
116 | r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
117 | r_mvp = np.matmul(proj, r_mv).astype(np.float32)
118 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
119 | a_mvp = np.matmul(proj, a_mv).astype(np.float32)
120 |
121 | # Run training and measure geometric error.
122 | gl_val, _ = util.run([geom_loss, train_op], {mtx_in: r_mvp, lr_in: lr})
123 | gl_avg.append(gl_val)
124 |
125 | # Print/save log.
126 | if log_interval and (it % log_interval == 0):
127 | gl_val, gl_avg = np.mean(np.asarray(gl_avg)), []
128 | s = ("rep=%d," % rep) if repeats > 1 else ""
129 | s += "iter=%d,err=%f" % (it, gl_val)
130 | print(s)
131 | if log_file:
132 | log_file.write(s + "\n")
133 |
134 | # Show/save image.
135 | display_image = display_interval and (it % display_interval == 0)
136 | save_image = imgsave_interval and (it % imgsave_interval == 0)
137 |
138 | if display_image or save_image:
139 | ang = ang + 0.1
140 | img_o = util.run(color_opt, {mtx_in: r_mvp})[0]
141 | img_b = util.run(color, {mtx_in: r_mvp})[0]
142 | img_d = util.run(color_disp, {mtx_in: a_mvp})[0]
143 | img_r = util.run(color_disp_ref, {mtx_in: a_mvp})[0]
144 |
145 | scl = display_res // img_o.shape[0]
146 | img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1)
147 | img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1)
148 | result_image = np.concatenate([img_o, img_b, img_d, img_r], axis=1)
149 |
150 | if display_image:
151 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
152 | if save_image:
153 | util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)
154 |
155 | # All repeats done.
156 | if log_file:
157 | log_file.close()
158 |
159 | #----------------------------------------------------------------------------
160 | # Main function.
161 | #----------------------------------------------------------------------------
162 |
163 | def main():
164 | display_interval = 0
165 | discontinuous = False
166 | resolution = 0
167 |
168 | def usage():
169 | print("Usage: python cube.py [-v] [-discontinuous] resolution")
170 | exit()
171 |
172 | for a in sys.argv[1:]:
173 | if a == '-v':
174 | display_interval = 100
175 | elif a == '-discontinuous':
176 | discontinuous = True
177 | elif a.isdecimal():
178 | resolution = int(a)
179 | else:
180 | usage()
181 |
182 | if resolution <= 0:
183 | usage()
184 |
185 | # Initialize TensorFlow.
186 | util.init_tf()
187 |
188 | # Run.
189 | out_dir = 'out/cube_%s_%d' % (('d' if discontinuous else 'c'), resolution)
190 | fit_cube(max_iter=5000, resolution=resolution, discontinuous=discontinuous, log_interval=10, display_interval=display_interval, out_dir=out_dir, log_fn='log.txt', imgsave_interval=1000, imgsave_fn='img_%06d.png')
191 |
192 | # Done.
193 | print("Done.")
194 |
195 | #----------------------------------------------------------------------------
196 |
197 | if __name__ == "__main__":
198 | main()
199 |
200 | #----------------------------------------------------------------------------
201 |
--------------------------------------------------------------------------------
/samples/tensorflow/earth.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import numpy as np
10 | import tensorflow as tf
11 | import os
12 | import sys
13 | import pathlib
14 |
15 | import util
16 |
17 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) # for nvdiffrast
18 | import nvdiffrast.tensorflow as dr
19 |
20 | #----------------------------------------------------------------------------
21 | # Texture learning with/without mipmaps.
22 | #----------------------------------------------------------------------------
23 |
24 | def fit_earth(max_iter = 20000,
25 | log_interval = 10,
26 | display_interval = None,
27 | display_res = 1024,
28 | enable_mip = True,
29 | res = 512,
30 | ref_res = 4096,
31 | lr_base = 1e-2,
32 | lr_ramp = 0.1,
33 | out_dir = '.',
34 | log_fn = None,
35 | texsave_interval = None,
36 | texsave_fn = None,
37 | imgsave_interval = None,
38 | imgsave_fn = None):
39 |
40 | if out_dir:
41 | os.makedirs(out_dir, exist_ok=True)
42 |
43 | # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at
44 | # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125
45 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
46 | with np.load(f'{datadir}/earth.npz') as f:
47 | pos_idx, pos, uv_idx, uv, tex = f.values()
48 | tex = tex.astype(np.float32)/255.0
49 | max_mip_level = 9 # Texture is a 4x3 atlas of 512x512 maps.
50 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))
51 |
52 | # Transformation matrix input to TF graph.
53 | mtx_in = tf.placeholder(tf.float32, [4, 4])
54 |
55 | # Learned texture.
56 | tex_var = tf.get_variable('tex', initializer=tf.constant_initializer(0.2), shape=tex.shape)
57 |
58 | # Setup TF graph for reference rendering in high resolution.
59 | pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...]
60 | rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [ref_res, ref_res])
61 | texc, texd = dr.interpolate(uv[tf.newaxis, ...], rast_out, uv_idx, rast_db=rast_out_db, diff_attrs='all')
62 | color = dr.texture(tex[np.newaxis], texc, texd, filter_mode='linear-mipmap-linear', max_mip_level=max_mip_level)
63 | color = color * tf.clip_by_value(rast_out[..., -1:], 0, 1) # Mask out background.
64 |
65 | # Reduce the reference to correct size.
66 | while color.shape[1] > res:
67 | color = util.bilinear_downsample(color)
68 |
69 | # TF Graph for rendered candidate.
70 | if enable_mip:
71 | # With mipmaps.
72 | rast_out_opt, rast_out_db_opt = dr.rasterize(pos_clip, pos_idx, [res, res])
73 | texc_opt, texd_opt = dr.interpolate(uv[tf.newaxis, ...], rast_out_opt, uv_idx, rast_db=rast_out_db_opt, diff_attrs='all')
74 | color_opt = dr.texture(tex_var[np.newaxis], texc_opt, texd_opt, filter_mode='linear-mipmap-linear', max_mip_level=max_mip_level)
75 | else:
76 | # No mipmaps: no image-space derivatives anywhere.
77 | rast_out_opt, _ = dr.rasterize(pos_clip, pos_idx, [res, res], output_db=False)
78 | texc_opt, _ = dr.interpolate(uv[tf.newaxis, ...], rast_out_opt, uv_idx)
79 | color_opt = dr.texture(tex_var[np.newaxis], texc_opt, filter_mode='linear')
80 | color_opt = color_opt * tf.clip_by_value(rast_out_opt[..., -1:], 0, 1) # Mask out background.
81 |
82 | # Measure only relevant portions of texture when calculating texture PSNR.
83 | loss = tf.reduce_mean((color - color_opt)**2)
84 | texmask = np.zeros_like(tex)
85 | tr = tex.shape[1]//4
86 | texmask[tr+13:2*tr-13, 25:-25, :] += 1.0
87 | texmask[25:-25, tr+13:2*tr-13, :] += 1.0
88 | texloss = (tf.reduce_sum(texmask * (tex - tex_var)**2)/np.sum(texmask))**0.5 # RMSE within masked area.
89 |
90 | # Training driven by image-space loss.
91 | lr_in = tf.placeholder(tf.float32, [])
92 | train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.99).minimize(loss, var_list=[tex_var])
93 |
94 | # Open log file.
95 | log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None
96 |
97 | # Render.
98 | ang = 0.0
99 | util.init_uninitialized_vars()
100 | texloss_avg = []
101 | for it in range(max_iter + 1):
102 | lr = lr_base * lr_ramp**(float(it)/float(max_iter))
103 |
104 | # Random rotation/translation matrix for optimization.
105 | r_rot = util.random_rotation_translation(0.25)
106 |
107 | # Smooth rotation for display.
108 | ang = ang + 0.01
109 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
110 | dist = np.random.uniform(0.0, 48.5)
111 |
112 | # Modelview and modelview + projection matrices.
113 | proj = util.projection(x=0.4, n=1.0, f=200.0)
114 | r_mv = np.matmul(util.translate(0, 0, -1.5-dist), r_rot)
115 | r_mvp = np.matmul(proj, r_mv).astype(np.float32)
116 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
117 | a_mvp = np.matmul(proj, a_mv).astype(np.float32)
118 |
119 | # Run training and measure texture-space RMSE loss.
120 | texloss_val, _ = util.run([texloss, train_op], {mtx_in: r_mvp, lr_in: lr})
121 | texloss_avg.append(texloss_val)
122 |
123 | # Print/save log.
124 | if log_interval and (it % log_interval == 0):
125 | texloss_val, texloss_avg = np.mean(np.asarray(texloss_avg)), []
126 | psnr = -10.0 * np.log10(texloss_val**2) # PSNR based on average RMSE.
127 | s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr)
128 | print(s)
129 | if log_file:
130 | log_file.write(s + '\n')
131 |
132 | # Show/save result images/textures.
133 | display_image = display_interval and (it % display_interval) == 0
134 | save_image = imgsave_interval and (it % imgsave_interval) == 0
135 | save_texture = texsave_interval and (it % texsave_interval) == 0
136 |
137 | if display_image or save_image:
138 | result_image = util.run(color_opt, {mtx_in: a_mvp})[0]
139 | if display_image:
140 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
141 | if save_image:
142 | util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)
143 | if save_texture:
144 | util.save_image(out_dir + '/' + (texsave_fn % it), util.run(tex_var)[::-1])
145 |
146 | # Done.
147 | if log_file:
148 | log_file.close()
149 |
150 | #----------------------------------------------------------------------------
151 | # Main function.
152 | #----------------------------------------------------------------------------
153 |
154 | def main():
155 | display_interval = 0
156 | enable_mip = None
157 |
158 | def usage():
159 | print("Usage: python earth.py [-v] [-mip|-nomip]")
160 | exit()
161 |
162 | for a in sys.argv[1:]:
163 | if a == '-v': display_interval = 10
164 | elif a == '-mip': enable_mip = True
165 | elif a == '-nomip': enable_mip = False
166 | else: usage()
167 |
168 | if enable_mip is None:
169 | usage()
170 |
171 | # Initialize TensorFlow.
172 | util.init_tf()
173 |
174 | # Run.
175 | out_dir = 'out/earth_mip' if enable_mip else 'out/earth_nomip'
176 | fit_earth(max_iter=20000, log_interval=10, display_interval=display_interval, enable_mip=enable_mip, out_dir=out_dir, log_fn='log.txt', texsave_interval=1000, texsave_fn='tex_%06d.png', imgsave_interval=1000, imgsave_fn='img_%06d.png')
177 |
178 | # Done.
179 | print("Done.")
180 |
181 | #----------------------------------------------------------------------------
182 |
183 | if __name__ == "__main__":
184 | main()
185 |
186 | #----------------------------------------------------------------------------
187 |
--------------------------------------------------------------------------------
/samples/tensorflow/envphong.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import numpy as np
10 | import tensorflow as tf
11 | import os
12 | import sys
13 | import pathlib
14 |
15 | import util
16 |
17 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) # for nvdiffrast
18 | import nvdiffrast.tensorflow as dr
19 |
20 | #----------------------------------------------------------------------------
21 | # Environment map and Phong BRDF learning.
22 | #----------------------------------------------------------------------------
23 |
24 | def fit_env_phong(max_iter = 1000,
25 | log_interval = 10,
26 | display_interval = None,
27 | display_res = 1024,
28 | res = 1024,
29 | lr_base = 1e-2,
30 | lr_ramp = 1.0,
31 | out_dir = '.',
32 | log_fn = None,
33 | imgsave_interval = None,
34 | imgsave_fn = None):
35 |
36 | if out_dir:
37 | os.makedirs(out_dir, exist_ok=True)
38 |
39 | # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap
40 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
41 | with np.load(f'{datadir}/envphong.npz') as f:
42 | pos_idx, pos, normals, env = f.values()
43 | env = env.astype(np.float32)/255.0
44 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))
45 |
46 | # Target Phong parameters.
47 | phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32)
48 | phong_exp = 25.0
49 |
50 | # Inputs to TF graph.
51 | mtx_in = tf.placeholder(tf.float32, [4, 4])
52 | invmtx_in = tf.placeholder(tf.float32, [4, 4]) # Inverse.
53 | campos_in = tf.placeholder(tf.float32, [3]) # Camera position in world space.
54 | lightdir_in = tf.placeholder(tf.float32, [3]) # Light direction.
55 |
56 | # Learned variables: environment maps, phong color, phong exponent.
57 | env_var = tf.get_variable('env_var', initializer=tf.constant_initializer(0.5), shape=env.shape)
58 | phong_var_raw = tf.get_variable('phong_var', initializer=tf.random_uniform_initializer(0.0, 1.0), shape=[4]) # R, G, B, exp.
59 | phong_var = phong_var_raw * [1.0, 1.0, 1.0, 10.0] # Faster learning rate for the exponent.
60 |
61 | # Transform and rasterize.
62 | viewvec = pos[..., :3] - campos_in[np.newaxis, np.newaxis, :] # View vectors at vertices.
63 | reflvec = viewvec - 2.0 * normals[tf.newaxis, ...] * tf.reduce_sum(normals[tf.newaxis, ...] * viewvec, axis=-1, keepdims=True) # Reflection vectors at vertices.
64 | reflvec = reflvec / tf.reduce_sum(reflvec**2, axis=-1, keepdims=True)**0.5 # Normalize.
65 | pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...]
66 | rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [res, res])
67 | refl, refld = dr.interpolate(reflvec, rast_out, pos_idx, rast_db=rast_out_db, diff_attrs='all') # Interpolated reflection vectors.
68 |
69 | # Phong light.
70 | refl = refl / tf.reduce_sum(refl**2, axis=-1, keepdims=True)**0.5 # Normalize.
71 | ldotr = tf.reduce_sum(-lightdir_in * refl, axis=-1, keepdims=True) # L dot R.
72 |
73 | # Reference color. No need for AA because we are not learning geometry.
74 | env = np.stack(env)[:, ::-1]
75 | color = dr.texture(env[np.newaxis, ...], refl, refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
76 | color = tf.reduce_sum(tf.stack(color), axis=0)
77 | color = color + phong_rgb * tf.maximum(0.0, ldotr) ** phong_exp # Phong.
78 | color = tf.maximum(color, 1.0 - tf.clip_by_value(rast_out[..., -1:], 0, 1)) # White background.
79 |
80 | # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead.
81 | color_opt = dr.texture(env_var[tf.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
82 | color_opt = tf.reduce_sum(tf.stack(color_opt), axis=0)
83 | color_opt = color_opt + phong_var[:3] * tf.maximum(0.0, ldotr) ** phong_var[3] # Phong.
84 | color_opt = tf.maximum(color_opt, 1.0 - tf.clip_by_value(rast_out[..., -1:], 0, 1)) # White background.
85 |
86 | # Training.
87 | loss = tf.reduce_mean((color - color_opt)**2) # L2 pixel loss.
88 | lr_in = tf.placeholder(tf.float32, [])
89 | train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.99).minimize(loss, var_list=[env_var, phong_var_raw])
90 |
91 | # Open log file.
92 | log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None
93 |
94 | # Render.
95 | ang = 0.0
96 | util.init_uninitialized_vars()
97 | imgloss_avg, phong_avg = [], []
98 | for it in range(max_iter + 1):
99 | lr = lr_base * lr_ramp**(float(it)/float(max_iter))
100 |
101 | # Random rotation/translation matrix for optimization.
102 | r_rot = util.random_rotation_translation(0.25)
103 |
104 | # Smooth rotation for display.
105 | ang = ang + 0.01
106 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
107 |
108 | # Modelview and modelview + projection matrices.
109 | proj = util.projection(x=0.4, n=1.0, f=200.0)
110 | r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
111 | r_mvp = np.matmul(proj, r_mv).astype(np.float32)
112 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
113 | a_mvp = np.matmul(proj, a_mv).astype(np.float32)
114 |
115 | # Solve camera positions.
116 | a_campos = np.linalg.inv(a_mv)[:3, 3]
117 | r_campos = np.linalg.inv(r_mv)[:3, 3]
118 |
119 | # Random light direction.
120 | lightdir = np.random.normal(size=[3])
121 | lightdir /= np.linalg.norm(lightdir) + 1e-8
122 |
123 | # Run training and measure image-space RMSE loss.
124 | imgloss_val, phong_val, _ = util.run([loss, phong_var, train_op], {mtx_in: r_mvp, invmtx_in: np.linalg.inv(r_mvp), campos_in: r_campos, lightdir_in: lightdir, lr_in: lr})
125 | imgloss_avg.append(imgloss_val**0.5)
126 | phong_avg.append(phong_val)
127 |
128 | # Print/save log.
129 | if log_interval and (it % log_interval == 0):
130 | imgloss_val, imgloss_avg = np.mean(np.asarray(imgloss_avg, np.float32)), []
131 | phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32), axis=0), []
132 | phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5
133 | phong_exp_rel_err = np.abs(phong_val[3] - phong_exp)/phong_exp
134 | s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val)
135 | print(s)
136 | if log_file:
137 | log_file.write(s + '\n')
138 |
139 | # Show/save result image.
140 | display_image = display_interval and (it % display_interval == 0)
141 | save_image = imgsave_interval and (it % imgsave_interval == 0)
142 |
143 | if display_image or save_image:
144 | result_image = util.run(color_opt, {mtx_in: a_mvp, invmtx_in: np.linalg.inv(a_mvp), campos_in: a_campos, lightdir_in: lightdir})[0]
145 | if display_image:
146 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
147 | if save_image:
148 | util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)
149 |
150 | # Done.
151 | if log_file:
152 | log_file.close()
153 |
154 | #----------------------------------------------------------------------------
155 | # Main function.
156 | #----------------------------------------------------------------------------
157 |
158 | def main():
159 | display_interval = 0
160 | for a in sys.argv[1:]:
161 | if a == '-v':
162 | display_interval = 10
163 | else:
164 | print("Usage: python envphong.py [-v]")
165 | exit()
166 |
167 | # Initialize TensorFlow.
168 | util.init_tf()
169 |
170 | # Run.
171 | fit_env_phong(max_iter=1500, log_interval=10, display_interval=display_interval, out_dir='out/env_phong', log_fn='log.txt', imgsave_interval=100, imgsave_fn='img_%06d.png')
172 |
173 | # Done.
174 | print("Done.")
175 |
176 | #----------------------------------------------------------------------------
177 |
178 | if __name__ == "__main__":
179 | main()
180 |
181 | #----------------------------------------------------------------------------
182 |
--------------------------------------------------------------------------------
/samples/tensorflow/pose.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import numpy as np
10 | import tensorflow as tf
11 | import os
12 | import sys
13 | import util
14 | import pathlib
15 |
16 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) # for nvdiffrast
17 | import nvdiffrast.tensorflow as dr
18 |
19 | #----------------------------------------------------------------------------
20 | # Quaternion math.
21 | #----------------------------------------------------------------------------
22 |
23 | # Unit quaternion.
24 | def q_unit():
25 | return np.asarray([1, 0, 0, 0], np.float32)
26 |
27 | # Get a random normalized quaternion.
28 | def q_rnd():
29 | u, v, w = np.random.uniform(0.0, 1.0, size=[3])
30 | v *= 2.0 * np.pi
31 | w *= 2.0 * np.pi
32 | return np.asarray([(1.0-u)**0.5 * np.sin(v), (1.0-u)**0.5 * np.cos(v), u**0.5 * np.sin(w), u**0.5 * np.cos(w)], np.float32)
33 |
34 | # Get a random quaternion from the octahedral symmetric group S_4.
35 | _r2 = 0.5**0.5
36 | _q_S4 = [[ 1.0, 0.0, 0.0, 0.0], [ 0.0, 1.0, 0.0, 0.0], [ 0.0, 0.0, 1.0, 0.0], [ 0.0, 0.0, 0.0, 1.0],
37 | [-0.5, 0.5, 0.5, 0.5], [-0.5,-0.5,-0.5, 0.5], [ 0.5,-0.5, 0.5, 0.5], [ 0.5, 0.5,-0.5, 0.5],
38 | [ 0.5, 0.5, 0.5, 0.5], [-0.5, 0.5,-0.5, 0.5], [ 0.5,-0.5,-0.5, 0.5], [-0.5,-0.5, 0.5, 0.5],
39 | [ _r2,-_r2, 0.0, 0.0], [ _r2, _r2, 0.0, 0.0], [ 0.0, 0.0, _r2, _r2], [ 0.0, 0.0,-_r2, _r2],
40 | [ 0.0, _r2, _r2, 0.0], [ _r2, 0.0, 0.0,-_r2], [ _r2, 0.0, 0.0, _r2], [ 0.0,-_r2, _r2, 0.0],
41 | [ _r2, 0.0, _r2, 0.0], [ 0.0, _r2, 0.0, _r2], [ _r2, 0.0,-_r2, 0.0], [ 0.0,-_r2, 0.0, _r2]]
42 | def q_rnd_S4():
43 | return np.asarray(_q_S4[np.random.randint(24)], np.float32)
44 |
45 | # Quaternion slerp.
46 | def q_slerp(p, q, t):
47 | d = np.dot(p, q)
48 | if d < 0.0:
49 | q = -q
50 | d = -d
51 | if d > 0.999:
52 | a = p + t * (q-p)
53 | return a / np.linalg.norm(a)
54 | t0 = np.arccos(d)
55 | tt = t0 * t
56 | st = np.sin(tt)
57 | st0 = np.sin(t0)
58 | s1 = st / st0
59 | s0 = np.cos(tt) - d*s1
60 | return s0*p + s1*q
61 |
62 | # Quaterion scale (slerp vs. identity quaternion).
63 | def q_scale(q, scl):
64 | return q_slerp(q_unit(), q, scl)
65 |
66 | # Quaternion product.
67 | def q_mul(p, q):
68 | s1, V1 = p[0], p[1:]
69 | s2, V2 = q[0], q[1:]
70 | s = s1*s2 - np.dot(V1, V2)
71 | V = s1*V2 + s2*V1 + np.cross(V1, V2)
72 | return np.asarray([s, V[0], V[1], V[2]], np.float32)
73 |
74 | # Angular difference between two quaternions in degrees.
75 | def q_angle_deg(p, q):
76 | d = np.abs(np.dot(p, q))
77 | d = min(d, 1.0)
78 | return np.degrees(2.0 * np.arccos(d))
79 |
80 | # Quaternion product in TensorFlow.
81 | def q_mul_tf(p, q):
82 | a = p[0]*q[0] - p[1]*q[1] - p[2]*q[2] - p[3]*q[3]
83 | b = p[0]*q[1] + p[1]*q[0] + p[2]*q[3] - p[3]*q[2]
84 | c = p[0]*q[2] + p[2]*q[0] + p[3]*q[1] - p[1]*q[3]
85 | d = p[0]*q[3] + p[3]*q[0] + p[1]*q[2] - p[2]*q[1]
86 | return tf.stack([a, b, c, d])
87 |
88 | # Convert quaternion to 4x4 rotation matrix. TensorFlow.
89 | def q_to_mtx_tf(q):
90 | r0 = tf.stack([1.0-2.0*q[1]**2 - 2.0*q[2]**2, 2.0*q[0]*q[1] - 2.0*q[2]*q[3], 2.0*q[0]*q[2] + 2.0*q[1]*q[3]])
91 | r1 = tf.stack([2.0*q[0]*q[1] + 2.0*q[2]*q[3], 1.0 - 2.0*q[0]**2 - 2.0*q[2]**2, 2.0*q[1]*q[2] - 2.0*q[0]*q[3]])
92 | r2 = tf.stack([2.0*q[0]*q[2] - 2.0*q[1]*q[3], 2.0*q[1]*q[2] + 2.0*q[0]*q[3], 1.0 - 2.0*q[0]**2 - 2.0*q[1]**2])
93 | rr = tf.transpose(tf.stack([r0, r1, r2]), [1, 0])
94 | rr = tf.concat([rr, tf.convert_to_tensor([[0], [0], [0]], tf.float32)], axis=1) # Pad right column.
95 | rr = tf.concat([rr, tf.convert_to_tensor([[0, 0, 0, 1]], tf.float32)], axis=0) # Pad bottom row.
96 | return rr
97 |
98 | #----------------------------------------------------------------------------
99 | # Cube pose fitter.
100 | #----------------------------------------------------------------------------
101 |
102 | def fit_pose(max_iter = 10000,
103 | repeats = 1,
104 | log_interval = 10,
105 | display_interval = None,
106 | display_res = 512,
107 | lr_base = 0.01,
108 | lr_falloff = 1.0,
109 | nr_base = 1.0,
110 | nr_falloff = 1e-4,
111 | grad_phase_start = 0.5,
112 | resolution = 256,
113 | out_dir = '.',
114 | log_fn = None,
115 | imgsave_interval = None,
116 | imgsave_fn = None):
117 |
118 | if out_dir:
119 | os.makedirs(out_dir, exist_ok=True)
120 |
121 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
122 | with np.load(f'{datadir}/cube_p.npz') as f:
123 | pos_idx, pos, col_idx, col = f.values()
124 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))
125 |
126 | # Transformation matrix input to TF graph.
127 | mtx_in = tf.placeholder(tf.float32, [4, 4])
128 |
129 | # Pose matrix input to TF graph.
130 | pose_in = tf.placeholder(tf.float32, [4]) # Quaternion.
131 | noise_in = tf.placeholder(tf.float32, [4]) # Mollification noise.
132 |
133 | # Setup TF graph for reference.
134 | mtx_total = tf.matmul(mtx_in, q_to_mtx_tf(pose_in))
135 | pos_clip = tf.matmul(pos, mtx_total, transpose_b=True)[tf.newaxis, ...]
136 | rast_out, _ = dr.rasterize(pos_clip, pos_idx, resolution=[resolution, resolution], output_db=False)
137 | color, _ = dr.interpolate(col[tf.newaxis, ...], rast_out, col_idx)
138 | color = dr.antialias(color, rast_out, pos_clip, pos_idx)
139 |
140 | # Setup TF graph for optimization candidate.
141 | pose_var = tf.get_variable('pose', initializer=tf.zeros_initializer(), shape=[4])
142 | pose_var_in = tf.placeholder(tf.float32, [4])
143 | pose_set = tf.assign(pose_var, pose_var_in)
144 | pose_norm_op = tf.assign(pose_var, pose_var / tf.reduce_sum(pose_var**2)**0.5) # Normalization operation.
145 | pose_total = q_mul_tf(pose_var, noise_in)
146 | mtx_total_opt = tf.matmul(mtx_in, q_to_mtx_tf(pose_total))
147 | pos_clip_opt = tf.matmul(pos, mtx_total_opt, transpose_b=True)[tf.newaxis, ...]
148 | rast_out_opt, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[resolution, resolution], output_db=False)
149 | color_opt, _ = dr.interpolate(col[tf.newaxis, ...], rast_out_opt, col_idx)
150 | color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx)
151 |
152 | # Image-space loss.
153 | diff = (color_opt - color)**2 # L2 norm.
154 | diff = tf.tanh(5.0 * tf.reduce_max(diff, axis=-1)) # Add some oomph to the loss.
155 | loss = tf.reduce_mean(diff)
156 | lr_in = tf.placeholder(tf.float32, [])
157 | train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.999).minimize(loss, var_list=[pose_var])
158 |
159 | # Open log file.
160 | log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None
161 |
162 | # Repeats.
163 | for rep in range(repeats):
164 |
165 | # Optimize.
166 | util.init_uninitialized_vars()
167 | loss_best = np.inf
168 | pose_best = None
169 | for it in range(max_iter + 1):
170 | # Modelview + projection matrix.
171 | mvp = np.matmul(util.projection(x=0.4), util.translate(0, 0, -3.5)).astype(np.float32)
172 |
173 | # Learning and noise rate scheduling.
174 | itf = 1.0 * it / max_iter
175 | lr = lr_base * lr_falloff**itf
176 | nr = nr_base * nr_falloff**itf
177 |
178 | # Noise input.
179 | if itf >= grad_phase_start:
180 | noise = q_unit()
181 | else:
182 | noise = q_scale(q_rnd(), nr)
183 | noise = q_mul(noise, q_rnd_S4()) # Orientation noise.
184 |
185 | # Initialize optimization.
186 | if it == 0:
187 | pose_target = q_rnd()
188 | util.run(pose_set, {pose_var_in: q_rnd()})
189 | util.run(pose_norm_op)
190 | util.run(loss, {mtx_in: mvp, pose_in: pose_target, noise_in: noise}) # Pipecleaning pass.
191 |
192 | # Run gradient training step.
193 | if itf >= grad_phase_start:
194 | util.run(train_op, {mtx_in: mvp, pose_in: pose_target, noise_in: noise, lr_in: lr})
195 | util.run(pose_norm_op)
196 |
197 | # Measure image-space loss and update best found pose.
198 | loss_val = util.run(loss, {mtx_in: mvp, pose_in: pose_target, noise_in: noise, lr_in: lr})
199 | if loss_val < loss_best:
200 | pose_best = util.run(pose_total, {noise_in: noise})
201 | if loss_val > 0.0:
202 | loss_best = loss_val
203 | else:
204 | # Return to best pose in the greedy phase.
205 | if itf < grad_phase_start:
206 | util.run(pose_set, {pose_var_in: pose_best})
207 |
208 | # Print/save log.
209 | if log_interval and (it % log_interval == 0):
210 | err = q_angle_deg(util.run(pose_var), pose_target)
211 | ebest = q_angle_deg(pose_best, pose_target)
212 | s = "rep=%d,iter=%d,err=%f,err_best=%f,loss=%f,loss_best=%f,lr=%f,nr=%f" % (rep, it, err, ebest, loss_val, loss_best, lr, nr)
213 | print(s)
214 | if log_file:
215 | log_file.write(s + "\n")
216 |
217 | # Show/save image.
218 | display_image = display_interval and (it % display_interval == 0)
219 | save_image = imgsave_interval and (it % imgsave_interval == 0)
220 |
221 | if display_image or save_image:
222 | img_ref, img_opt = util.run([color, color_opt], {mtx_in: mvp, pose_in: pose_target, noise_in: noise})
223 | img_best, = util.run([color_opt], {mtx_in: mvp, pose_in: pose_best, noise_in: q_unit()})
224 | img_ref = img_ref[0]
225 | img_opt = img_opt[0]
226 | img_best = img_best[0]
227 | result_image = np.concatenate([img_ref, img_best, img_opt], axis=1)
228 |
229 | if display_image:
230 | util.display_image(result_image, size=display_res, title='(%d) %d / %d' % (rep, it, max_iter))
231 | if save_image:
232 | util.save_image(out_dir + '/' + (imgsave_fn % (rep, it)), result_image)
233 |
234 | # All repeats done.
235 | if log_file:
236 | log_file.close()
237 |
238 | #----------------------------------------------------------------------------
239 | # Main function.
240 | #----------------------------------------------------------------------------
241 |
242 | def main():
243 | display_interval = 0
244 | repeats = 1
245 |
246 | def usage():
247 | print("Usage: python pose.py [-v] [repeats]")
248 | exit()
249 |
250 | for a in sys.argv[1:]:
251 | if a == '-v':
252 | display_interval = 10
253 | elif a.isascii() and a.isdecimal():
254 | repeats = int(a)
255 | else:
256 | usage()
257 |
258 | if repeats <= 0:
259 | usage()
260 |
261 | # Initialize TensorFlow.
262 | util.init_tf()
263 |
264 | # Run.
265 | fit_pose(max_iter=1000, repeats=repeats, log_interval=100, display_interval=display_interval, out_dir='out/pose', log_fn='log.txt', imgsave_interval=1000, imgsave_fn='img_%03d_%06d.png')
266 |
267 | # Done.
268 | print("Done.")
269 |
270 | #----------------------------------------------------------------------------
271 |
272 | if __name__ == "__main__":
273 | main()
274 |
275 | #----------------------------------------------------------------------------
276 |
--------------------------------------------------------------------------------
/samples/tensorflow/triangle.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import imageio
10 | import logging
11 | import os
12 | import numpy as np
13 | import tensorflow as tf
14 | import nvdiffrast.tensorflow as dr
15 |
16 | # Silence deprecation warnings and debug level logging
17 | logging.getLogger('tensorflow').setLevel(logging.ERROR)
18 | os.environ.setdefault('TF_CPP_MIN_LOG_LEVEL', '1')
19 |
20 | pos = tf.convert_to_tensor([[[-0.8, -0.8, 0, 1], [0.8, -0.8, 0, 1], [-0.8, 0.8, 0, 1]]], dtype=tf.float32)
21 | col = tf.convert_to_tensor([[[1, 0, 0], [0, 1, 0], [0, 0, 1]]], dtype=tf.float32)
22 | tri = tf.convert_to_tensor([[0, 1, 2]], dtype=tf.int32)
23 |
24 | rast, _ = dr.rasterize(pos, tri, resolution=[256, 256])
25 | out, _ = dr.interpolate(col, rast, tri)
26 |
27 | with tf.Session() as sess:
28 | img = sess.run(out)
29 |
30 | img = img[0, ::-1, :, :] # Flip vertically.
31 | img = np.clip(np.rint(img * 255), 0, 255).astype(np.uint8) # Quantize to np.uint8
32 |
33 | print("Saving to 'tri.png'.")
34 | imageio.imsave('tri.png', img)
35 |
--------------------------------------------------------------------------------
/samples/tensorflow/util.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 |
10 | import os
11 | import numpy as np
12 | import tensorflow as tf
13 |
14 | # Silence deprecation warnings from TensorFlow 1.13 onwards
15 | import logging
16 | logging.getLogger('tensorflow').setLevel(logging.ERROR)
17 |
18 | from typing import Any, List
19 |
20 | #----------------------------------------------------------------------------
21 | # Projection and transformation matrix helpers.
22 | #----------------------------------------------------------------------------
23 |
24 | def projection(x=0.1, n=1.0, f=50.0):
25 | return np.array([[n/x, 0, 0, 0],
26 | [ 0, n/-x, 0, 0],
27 | [ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)],
28 | [ 0, 0, -1, 0]]).astype(np.float32)
29 |
30 | def translate(x, y, z):
31 | return np.array([[1, 0, 0, x],
32 | [0, 1, 0, y],
33 | [0, 0, 1, z],
34 | [0, 0, 0, 1]]).astype(np.float32)
35 |
36 | def rotate_x(a):
37 | s, c = np.sin(a), np.cos(a)
38 | return np.array([[1, 0, 0, 0],
39 | [0, c, s, 0],
40 | [0, -s, c, 0],
41 | [0, 0, 0, 1]]).astype(np.float32)
42 |
43 | def rotate_y(a):
44 | s, c = np.sin(a), np.cos(a)
45 | return np.array([[ c, 0, s, 0],
46 | [ 0, 1, 0, 0],
47 | [-s, 0, c, 0],
48 | [ 0, 0, 0, 1]]).astype(np.float32)
49 |
50 | def random_rotation_translation(t):
51 | m = np.random.normal(size=[3, 3])
52 | m[1] = np.cross(m[0], m[2])
53 | m[2] = np.cross(m[0], m[1])
54 | m = m / np.linalg.norm(m, axis=1, keepdims=True)
55 | m = np.pad(m, [[0, 1], [0, 1]], mode='constant')
56 | m[3, 3] = 1.0
57 | m[:3, 3] = np.random.uniform(-t, t, size=[3])
58 | return m
59 |
60 | #----------------------------------------------------------------------------
61 | # Bilinear downsample by 2x.
62 | #----------------------------------------------------------------------------
63 |
64 | def bilinear_downsample(x):
65 | w = tf.constant([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=tf.float32) / 64.0
66 | w = w[..., tf.newaxis, tf.newaxis] * tf.eye(x.shape[-1].value, batch_shape=[1, 1])
67 | x = tf.nn.conv2d(x, w, strides=2, padding='SAME')
68 | return x
69 |
70 | #----------------------------------------------------------------------------
71 | # Image display function using OpenGL.
72 | #----------------------------------------------------------------------------
73 |
74 | _glfw_window = None
75 | def display_image(image, zoom=None, size=None, title=None): # HWC
76 | # Import OpenGL and glfw.
77 | import OpenGL.GL as gl
78 | import glfw
79 |
80 | # Zoom image if requested.
81 | image = np.asarray(image)
82 | if size is not None:
83 | assert zoom is None
84 | zoom = max(1, size // image.shape[0])
85 | if zoom is not None:
86 | image = image.repeat(zoom, axis=0).repeat(zoom, axis=1)
87 | height, width, channels = image.shape
88 |
89 | # Initialize window.
90 | if title is None:
91 | title = 'Debug window'
92 | global _glfw_window
93 | if _glfw_window is None:
94 | glfw.init()
95 | _glfw_window = glfw.create_window(width, height, title, None, None)
96 | glfw.make_context_current(_glfw_window)
97 | glfw.show_window(_glfw_window)
98 | glfw.swap_interval(0)
99 | else:
100 | glfw.make_context_current(_glfw_window)
101 | glfw.set_window_title(_glfw_window, title)
102 | glfw.set_window_size(_glfw_window, width, height)
103 |
104 | # Update window.
105 | glfw.poll_events()
106 | gl.glClearColor(0, 0, 0, 1)
107 | gl.glClear(gl.GL_COLOR_BUFFER_BIT)
108 | gl.glWindowPos2f(0, 0)
109 | gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1)
110 | gl_format = {3: gl.GL_RGB, 2: gl.GL_RG, 1: gl.GL_LUMINANCE}[channels]
111 | gl_dtype = {'uint8': gl.GL_UNSIGNED_BYTE, 'float32': gl.GL_FLOAT}[image.dtype.name]
112 | gl.glDrawPixels(width, height, gl_format, gl_dtype, image[::-1])
113 | glfw.swap_buffers(_glfw_window)
114 | if glfw.window_should_close(_glfw_window):
115 | return False
116 | return True
117 |
118 | #----------------------------------------------------------------------------
119 | # Image save helper.
120 | #----------------------------------------------------------------------------
121 |
122 | def save_image(fn, x):
123 | import imageio
124 | x = np.rint(x * 255.0)
125 | x = np.clip(x, 0, 255).astype(np.uint8)
126 | imageio.imsave(fn, x)
127 |
128 | #----------------------------------------------------------------------------
129 |
130 | # TensorFlow utilities
131 |
132 | #----------------------------------------------------------------------------
133 |
134 | def _sanitize_tf_config(config_dict: dict = None) -> dict:
135 | # Defaults.
136 | cfg = dict()
137 | cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is.
138 | cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is.
139 | cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info.
140 | cfg["env.HDF5_USE_FILE_LOCKING"] = "FALSE" # Disable HDF5 file locking to avoid concurrency issues with network shares.
141 | cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used.
142 | cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed.
143 |
144 | # Remove defaults for environment variables that are already set.
145 | for key in list(cfg):
146 | fields = key.split(".")
147 | if fields[0] == "env":
148 | assert len(fields) == 2
149 | if fields[1] in os.environ:
150 | del cfg[key]
151 |
152 | # User overrides.
153 | if config_dict is not None:
154 | cfg.update(config_dict)
155 | return cfg
156 |
157 |
158 | def init_tf(config_dict: dict = None) -> None:
159 | """Initialize TensorFlow session using good default settings."""
160 | # Skip if already initialized.
161 | if tf.get_default_session() is not None:
162 | return
163 |
164 | # Setup config dict and random seeds.
165 | cfg = _sanitize_tf_config(config_dict)
166 | np_random_seed = cfg["rnd.np_random_seed"]
167 | if np_random_seed is not None:
168 | np.random.seed(np_random_seed)
169 | tf_random_seed = cfg["rnd.tf_random_seed"]
170 | if tf_random_seed == "auto":
171 | tf_random_seed = np.random.randint(1 << 31)
172 | if tf_random_seed is not None:
173 | tf.set_random_seed(tf_random_seed)
174 |
175 | # Setup environment variables.
176 | for key, value in cfg.items():
177 | fields = key.split(".")
178 | if fields[0] == "env":
179 | assert len(fields) == 2
180 | os.environ[fields[1]] = str(value)
181 |
182 | # Create default TensorFlow session.
183 | create_session(cfg, force_as_default=True)
184 |
185 |
186 | def assert_tf_initialized():
187 | """Check that TensorFlow session has been initialized."""
188 | if tf.get_default_session() is None:
189 | raise RuntimeError("No default TensorFlow session found. Please call util.init_tf().")
190 |
191 |
192 | def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session:
193 | """Create tf.Session based on config dict."""
194 | # Setup TensorFlow config proto.
195 | cfg = _sanitize_tf_config(config_dict)
196 | config_proto = tf.ConfigProto()
197 | for key, value in cfg.items():
198 | fields = key.split(".")
199 | if fields[0] not in ["rnd", "env"]:
200 | obj = config_proto
201 | for field in fields[:-1]:
202 | obj = getattr(obj, field)
203 | setattr(obj, fields[-1], value)
204 |
205 | # Create session.
206 | session = tf.Session(config=config_proto)
207 | if force_as_default:
208 | # pylint: disable=protected-access
209 | session._default_session = session.as_default()
210 | session._default_session.enforce_nesting = False
211 | session._default_session.__enter__()
212 | return session
213 |
214 |
215 | def is_tf_expression(x: Any) -> bool:
216 | """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation."""
217 | return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation))
218 |
219 |
220 | def absolute_name_scope(scope: str) -> tf.name_scope:
221 | """Forcefully enter the specified name scope, ignoring any surrounding scopes."""
222 | return tf.name_scope(scope + "/")
223 |
224 |
225 | def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None:
226 | """Initialize all tf.Variables that have not already been initialized.
227 |
228 | Equivalent to the following, but more efficient and does not bloat the tf graph:
229 | tf.variables_initializer(tf.report_uninitialized_variables()).run()
230 | """
231 | assert_tf_initialized()
232 | if target_vars is None:
233 | target_vars = tf.global_variables()
234 |
235 | test_vars = []
236 | test_ops = []
237 |
238 | with tf.control_dependencies(None): # ignore surrounding control_dependencies
239 | for var in target_vars:
240 | assert is_tf_expression(var)
241 |
242 | try:
243 | tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0"))
244 | except KeyError:
245 | # Op does not exist => variable may be uninitialized.
246 | test_vars.append(var)
247 |
248 | with absolute_name_scope(var.name.split(":")[0]):
249 | test_ops.append(tf.is_variable_initialized(var))
250 |
251 | init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited]
252 | run([var.initializer for var in init_vars])
253 |
254 | def run(*args, **kwargs) -> Any:
255 | """Run the specified ops in the default session."""
256 | assert_tf_initialized()
257 | return tf.get_default_session().run(*args, **kwargs)
258 |
--------------------------------------------------------------------------------
/samples/torch/cube.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import argparse
10 | import os
11 | import pathlib
12 | import sys
13 | import numpy as np
14 | import torch
15 | import imageio
16 |
17 | import util
18 |
19 | import nvdiffrast.torch as dr
20 |
21 | # Transform vertex positions to clip space
22 | def transform_pos(mtx, pos):
23 | t_mtx = torch.from_numpy(mtx).cuda() if isinstance(mtx, np.ndarray) else mtx
24 | # (x,y,z) -> (x,y,z,1)
25 | posw = torch.cat([pos, torch.ones([pos.shape[0], 1]).cuda()], axis=1)
26 | return torch.matmul(posw, t_mtx.t())[None, ...]
27 |
28 | def render(glctx, mtx, pos, pos_idx, vtx_col, col_idx, resolution: int):
29 | pos_clip = transform_pos(mtx, pos)
30 | rast_out, _ = dr.rasterize(glctx, pos_clip, pos_idx, resolution=[resolution, resolution])
31 | color, _ = dr.interpolate(vtx_col[None, ...], rast_out, col_idx)
32 | color = dr.antialias(color, rast_out, pos_clip, pos_idx)
33 | return color
34 |
35 | def make_grid(arr, ncols=2):
36 | n, height, width, nc = arr.shape
37 | nrows = n//ncols
38 | assert n == nrows*ncols
39 | return arr.reshape(nrows, ncols, height, width, nc).swapaxes(1,2).reshape(height*nrows, width*ncols, nc)
40 |
41 | def fit_cube(max_iter = 5000,
42 | resolution = 4,
43 | discontinuous = False,
44 | repeats = 1,
45 | log_interval = 10,
46 | display_interval = None,
47 | display_res = 512,
48 | out_dir = None,
49 | log_fn = None,
50 | mp4save_interval = None,
51 | mp4save_fn = None,
52 | use_opengl = False):
53 |
54 | log_file = None
55 | writer = None
56 | if out_dir:
57 | os.makedirs(out_dir, exist_ok=True)
58 | if log_fn:
59 | log_file = open(f'{out_dir}/{log_fn}', 'wt')
60 | if mp4save_interval != 0:
61 | writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M')
62 | else:
63 | mp4save_interval = None
64 |
65 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
66 | fn = 'cube_%s.npz' % ('d' if discontinuous else 'c')
67 | with np.load(f'{datadir}/{fn}') as f:
68 | pos_idx, vtxp, col_idx, vtxc = f.values()
69 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0]))
70 |
71 | # Create position/triangle index tensors
72 | pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
73 | col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda()
74 | vtx_pos = torch.from_numpy(vtxp.astype(np.float32)).cuda()
75 | vtx_col = torch.from_numpy(vtxc.astype(np.float32)).cuda()
76 |
77 | # Rasterizer context
78 | glctx = dr.RasterizeGLContext() if use_opengl else dr.RasterizeCudaContext()
79 |
80 | # Repeats.
81 | for rep in range(repeats):
82 |
83 | ang = 0.0
84 | gl_avg = []
85 |
86 | vtx_pos_rand = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp
87 | vtx_col_rand = np.random.uniform(0.0, 1.0, size=vtxc.shape)
88 | vtx_pos_opt = torch.tensor(vtx_pos_rand, dtype=torch.float32, device='cuda', requires_grad=True)
89 | vtx_col_opt = torch.tensor(vtx_col_rand, dtype=torch.float32, device='cuda', requires_grad=True)
90 |
91 | # Adam optimizer for vertex position and color with a learning rate ramp.
92 | optimizer = torch.optim.Adam([vtx_pos_opt, vtx_col_opt], lr=1e-2)
93 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: max(0.01, 10**(-x*0.0005)))
94 |
95 | for it in range(max_iter + 1):
96 | # Random rotation/translation matrix for optimization.
97 | r_rot = util.random_rotation_translation(0.25)
98 |
99 | # Smooth rotation for display.
100 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
101 |
102 | # Modelview and modelview + projection matrices.
103 | proj = util.projection(x=0.4)
104 | r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
105 | r_mvp = np.matmul(proj, r_mv).astype(np.float32)
106 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
107 | a_mvp = np.matmul(proj, a_mv).astype(np.float32)
108 |
109 | # Compute geometric error for logging.
110 | with torch.no_grad():
111 | geom_loss = torch.mean(torch.sum((torch.abs(vtx_pos_opt) - .5)**2, dim=1)**0.5)
112 | gl_avg.append(float(geom_loss))
113 |
114 | # Print/save log.
115 | if log_interval and (it % log_interval == 0):
116 | gl_val = np.mean(np.asarray(gl_avg))
117 | gl_avg = []
118 | s = ("rep=%d," % rep) if repeats > 1 else ""
119 | s += "iter=%d,err=%f" % (it, gl_val)
120 | print(s)
121 | if log_file:
122 | log_file.write(s + "\n")
123 |
124 | color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_col, col_idx, resolution)
125 | color_opt = render(glctx, r_mvp, vtx_pos_opt, pos_idx, vtx_col_opt, col_idx, resolution)
126 |
127 | # Compute loss and train.
128 | loss = torch.mean((color - color_opt)**2) # L2 pixel loss.
129 | optimizer.zero_grad()
130 | loss.backward()
131 | optimizer.step()
132 | scheduler.step()
133 |
134 | # Show/save image.
135 | display_image = display_interval and (it % display_interval == 0)
136 | save_mp4 = mp4save_interval and (it % mp4save_interval == 0)
137 |
138 | if display_image or save_mp4:
139 | ang = ang + 0.01
140 |
141 | img_b = color[0].cpu().numpy()[::-1]
142 | img_o = color_opt[0].detach().cpu().numpy()[::-1]
143 | img_d = render(glctx, a_mvp, vtx_pos_opt, pos_idx, vtx_col_opt, col_idx, display_res)[0]
144 | img_r = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_col, col_idx, display_res)[0]
145 |
146 | scl = display_res // img_o.shape[0]
147 | img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1)
148 | img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1)
149 | result_image = make_grid(np.stack([img_o, img_b, img_d.detach().cpu().numpy()[::-1], img_r.cpu().numpy()[::-1]]))
150 |
151 | if display_image:
152 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
153 | if save_mp4:
154 | writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8))
155 |
156 | # Done.
157 | if writer is not None:
158 | writer.close()
159 | if log_file:
160 | log_file.close()
161 |
162 | #----------------------------------------------------------------------------
163 |
164 | def main():
165 | parser = argparse.ArgumentParser(description='Cube fit example')
166 | parser.add_argument('--opengl', help='enable OpenGL rendering', action='store_true', default=False)
167 | parser.add_argument('--outdir', help='specify output directory', default='')
168 | parser.add_argument('--discontinuous', action='store_true', default=False)
169 | parser.add_argument('--resolution', type=int, default=0, required=True)
170 | parser.add_argument('--display-interval', type=int, default=0)
171 | parser.add_argument('--mp4save-interval', type=int, default=100)
172 | parser.add_argument('--max-iter', type=int, default=1000)
173 | args = parser.parse_args()
174 |
175 | # Set up logging.
176 | if args.outdir:
177 | ds = 'd' if args.discontinuous else 'c'
178 | out_dir = f'{args.outdir}/cube_{ds}_{args.resolution}'
179 | print (f'Saving results under {out_dir}')
180 | else:
181 | out_dir = None
182 | print ('No output directory specified, not saving log or images')
183 |
184 | # Run.
185 | fit_cube(
186 | max_iter=args.max_iter,
187 | resolution=args.resolution,
188 | discontinuous=args.discontinuous,
189 | log_interval=10,
190 | display_interval=args.display_interval,
191 | out_dir=out_dir,
192 | log_fn='log.txt',
193 | mp4save_interval=args.mp4save_interval,
194 | mp4save_fn='progress.mp4',
195 | use_opengl=args.opengl
196 | )
197 |
198 | # Done.
199 | print("Done.")
200 |
201 | #----------------------------------------------------------------------------
202 |
203 | if __name__ == "__main__":
204 | main()
205 |
206 | #----------------------------------------------------------------------------
207 |
--------------------------------------------------------------------------------
/samples/torch/earth.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import argparse
10 | import os
11 | import pathlib
12 | import sys
13 | import numpy as np
14 | import torch
15 |
16 | import util
17 |
18 | import nvdiffrast.torch as dr
19 |
20 | #----------------------------------------------------------------------------
21 | # Helpers.
22 |
23 | def transform_pos(mtx, pos):
24 | t_mtx = torch.from_numpy(mtx).cuda() if isinstance(mtx, np.ndarray) else mtx
25 | posw = torch.cat([pos, torch.ones([pos.shape[0], 1]).cuda()], axis=1)
26 | return torch.matmul(posw, t_mtx.t())[None, ...]
27 |
28 | def render(glctx, mtx, pos, pos_idx, uv, uv_idx, tex, resolution, enable_mip, max_mip_level):
29 | pos_clip = transform_pos(mtx, pos)
30 | rast_out, rast_out_db = dr.rasterize(glctx, pos_clip, pos_idx, resolution=[resolution, resolution])
31 |
32 | if enable_mip:
33 | texc, texd = dr.interpolate(uv[None, ...], rast_out, uv_idx, rast_db=rast_out_db, diff_attrs='all')
34 | color = dr.texture(tex[None, ...], texc, texd, filter_mode='linear-mipmap-linear', max_mip_level=max_mip_level)
35 | else:
36 | texc, _ = dr.interpolate(uv[None, ...], rast_out, uv_idx)
37 | color = dr.texture(tex[None, ...], texc, filter_mode='linear')
38 |
39 | color = color * torch.clamp(rast_out[..., -1:], 0, 1) # Mask out background.
40 | return color
41 |
42 | #----------------------------------------------------------------------------
43 |
44 | def fit_earth(max_iter = 20000,
45 | log_interval = 10,
46 | display_interval = None,
47 | display_res = 1024,
48 | enable_mip = True,
49 | res = 512,
50 | ref_res = 2048, # Dropped from 4096 to 2048 to allow using the Cuda rasterizer.
51 | lr_base = 1e-2,
52 | lr_ramp = 0.1,
53 | out_dir = None,
54 | log_fn = None,
55 | texsave_interval = None,
56 | texsave_fn = None,
57 | imgsave_interval = None,
58 | imgsave_fn = None,
59 | use_opengl = False):
60 |
61 | log_file = None
62 | if out_dir:
63 | os.makedirs(out_dir, exist_ok=True)
64 | if log_fn:
65 | log_file = open(out_dir + '/' + log_fn, 'wt')
66 | else:
67 | imgsave_interval, texsave_interval = None, None
68 |
69 | # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at
70 | # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125
71 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
72 | with np.load(f'{datadir}/earth.npz') as f:
73 | pos_idx, pos, uv_idx, uv, tex = f.values()
74 | tex = tex.astype(np.float32)/255.0
75 | max_mip_level = 9 # Texture is a 4x3 atlas of 512x512 maps.
76 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))
77 |
78 | # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1). Drop
79 | # the last column in that case.
80 | if pos.shape[1] == 4: pos = pos[:, 0:3]
81 |
82 | # Create position/triangle index tensors
83 | pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
84 | vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
85 | uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda()
86 | vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda()
87 |
88 | tex = torch.from_numpy(tex.astype(np.float32)).cuda()
89 | tex_opt = torch.full(tex.shape, 0.2, device='cuda', requires_grad=True)
90 | glctx = dr.RasterizeGLContext() if use_opengl else dr.RasterizeCudaContext()
91 |
92 | ang = 0.0
93 |
94 | # Adam optimizer for texture with a learning rate ramp.
95 | optimizer = torch.optim.Adam([tex_opt], lr=lr_base)
96 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: lr_ramp**(float(x)/float(max_iter)))
97 |
98 | # Render.
99 | ang = 0.0
100 | texloss_avg = []
101 | for it in range(max_iter + 1):
102 | # Random rotation/translation matrix for optimization.
103 | r_rot = util.random_rotation_translation(0.25)
104 |
105 | # Smooth rotation for display.
106 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
107 | dist = np.random.uniform(0.0, 48.5)
108 |
109 | # Modelview and modelview + projection matrices.
110 | proj = util.projection(x=0.4, n=1.0, f=200.0)
111 | r_mv = np.matmul(util.translate(0, 0, -1.5-dist), r_rot)
112 | r_mvp = np.matmul(proj, r_mv).astype(np.float32)
113 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
114 | a_mvp = np.matmul(proj, a_mv).astype(np.float32)
115 |
116 | # Measure texture-space RMSE loss
117 | with torch.no_grad():
118 | texmask = torch.zeros_like(tex)
119 | tr = tex.shape[1]//4
120 | texmask[tr+13:2*tr-13, 25:-25, :] += 1.0
121 | texmask[25:-25, tr+13:2*tr-13, :] += 1.0
122 | # Measure only relevant portions of texture when calculating texture
123 | # PSNR.
124 | texloss = (torch.sum(texmask * (tex - tex_opt)**2)/torch.sum(texmask))**0.5 # RMSE within masked area.
125 | texloss_avg.append(float(texloss))
126 |
127 | # Render reference and optimized frames. Always enable mipmapping for reference.
128 | color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex, ref_res, True, max_mip_level)
129 | color_opt = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex_opt, res, enable_mip, max_mip_level)
130 |
131 | # Reduce the reference to correct size.
132 | while color.shape[1] > res:
133 | color = util.bilinear_downsample(color)
134 |
135 | # Compute loss and perform a training step.
136 | loss = torch.mean((color - color_opt)**2) # L2 pixel loss.
137 | optimizer.zero_grad()
138 | loss.backward()
139 | optimizer.step()
140 | scheduler.step()
141 |
142 | # Print/save log.
143 | if log_interval and (it % log_interval == 0):
144 | texloss_val = np.mean(np.asarray(texloss_avg))
145 | texloss_avg = []
146 | psnr = -10.0 * np.log10(texloss_val**2) # PSNR based on average RMSE.
147 | s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr)
148 | print(s)
149 | if log_file:
150 | log_file.write(s + '\n')
151 |
152 | # Show/save image.
153 | display_image = display_interval and (it % display_interval == 0)
154 | save_image = imgsave_interval and (it % imgsave_interval == 0)
155 | save_texture = texsave_interval and (it % texsave_interval) == 0
156 |
157 | if display_image or save_image:
158 | ang = ang + 0.1
159 |
160 | with torch.no_grad():
161 | result_image = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex_opt, res, enable_mip, max_mip_level)[0].cpu().numpy()[::-1]
162 |
163 | if display_image:
164 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
165 | if save_image:
166 | util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)
167 |
168 | if save_texture:
169 | texture = tex_opt.cpu().numpy()[::-1]
170 | util.save_image(out_dir + '/' + (texsave_fn % it), texture)
171 |
172 |
173 | # Done.
174 | if log_file:
175 | log_file.close()
176 |
177 | #----------------------------------------------------------------------------
178 |
179 | def main():
180 | parser = argparse.ArgumentParser(description='Earth texture fitting example')
181 | parser.add_argument('--opengl', help='enable OpenGL rendering', action='store_true', default=False)
182 | parser.add_argument('--outdir', help='specify output directory', default='')
183 | parser.add_argument('--mip', help='enable mipmapping', action='store_true', default=False)
184 | parser.add_argument('--display-interval', type=int, default=0)
185 | parser.add_argument('--max-iter', type=int, default=10000)
186 | args = parser.parse_args()
187 |
188 | # Set up logging.
189 | if args.outdir:
190 | ms = 'mip' if args.mip else 'nomip'
191 | out_dir = f'{args.outdir}/earth_{ms}'
192 | print (f'Saving results under {out_dir}')
193 | else:
194 | out_dir = None
195 | print ('No output directory specified, not saving log or images')
196 |
197 | # Run.
198 | fit_earth(max_iter=args.max_iter, log_interval=10, display_interval=args.display_interval, enable_mip=args.mip, out_dir=out_dir, log_fn='log.txt', texsave_interval=1000, texsave_fn='tex_%06d.png', imgsave_interval=1000, imgsave_fn='img_%06d.png', use_opengl=args.opengl)
199 |
200 | # Done.
201 | print("Done.")
202 |
203 | #----------------------------------------------------------------------------
204 |
205 | if __name__ == "__main__":
206 | main()
207 |
208 | #----------------------------------------------------------------------------
209 |
--------------------------------------------------------------------------------
/samples/torch/envphong.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import argparse
10 | import numpy as np
11 | import torch
12 | import os
13 | import sys
14 | import pathlib
15 | import imageio
16 |
17 | import util
18 |
19 | import nvdiffrast.torch as dr
20 |
21 | #----------------------------------------------------------------------------
22 | # Environment map and Phong BRDF learning.
23 | #----------------------------------------------------------------------------
24 |
25 | def fit_env_phong(max_iter = 1000,
26 | log_interval = 10,
27 | display_interval = None,
28 | display_res = 1024,
29 | res = 1024,
30 | lr_base = 1e-2,
31 | lr_ramp = 1.0,
32 | out_dir = None,
33 | log_fn = None,
34 | mp4save_interval = None,
35 | mp4save_fn = None,
36 | use_opengl = False):
37 |
38 | log_file = None
39 | writer = None
40 | if out_dir:
41 | os.makedirs(out_dir, exist_ok=True)
42 | if log_fn:
43 | log_file = open(out_dir + '/' + log_fn, 'wt')
44 | if mp4save_interval != 0:
45 | writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M')
46 | else:
47 | mp4save_interval = None
48 |
49 | # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap
50 | datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
51 | with np.load(f'{datadir}/envphong.npz') as f:
52 | pos_idx, pos, normals, env = f.values()
53 | env = env.astype(np.float32)/255.0
54 | env = np.stack(env)[:, ::-1].copy()
55 | print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))
56 |
57 | # Move all the stuff to GPU.
58 | pos_idx = torch.as_tensor(pos_idx, dtype=torch.int32, device='cuda')
59 | pos = torch.as_tensor(pos, dtype=torch.float32, device='cuda')
60 | normals = torch.as_tensor(normals, dtype=torch.float32, device='cuda')
61 | env = torch.as_tensor(env, dtype=torch.float32, device='cuda')
62 |
63 | # Target Phong parameters.
64 | phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32)
65 | phong_exp = 25.0
66 | phong_rgb_t = torch.as_tensor(phong_rgb, dtype=torch.float32, device='cuda')
67 |
68 | # Learned variables: environment maps, phong color, phong exponent.
69 | env_var = torch.ones_like(env) * .5
70 | env_var.requires_grad_()
71 | phong_var_raw = torch.as_tensor(np.random.uniform(size=[4]), dtype=torch.float32, device='cuda')
72 | phong_var_raw.requires_grad_()
73 | phong_var_mul = torch.as_tensor([1.0, 1.0, 1.0, 10.0], dtype=torch.float32, device='cuda')
74 |
75 | # Render.
76 | ang = 0.0
77 | imgloss_avg, phong_avg = [], []
78 | glctx = dr.RasterizeGLContext() if use_opengl else dr.RasterizeCudaContext()
79 | zero_tensor = torch.as_tensor(0.0, dtype=torch.float32, device='cuda')
80 | one_tensor = torch.as_tensor(1.0, dtype=torch.float32, device='cuda')
81 |
82 | # Adam optimizer for environment map and phong with a learning rate ramp.
83 | optimizer = torch.optim.Adam([env_var, phong_var_raw], lr=lr_base)
84 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: lr_ramp**(float(x)/float(max_iter)))
85 |
86 | for it in range(max_iter + 1):
87 | phong_var = phong_var_raw * phong_var_mul
88 |
89 | # Random rotation/translation matrix for optimization.
90 | r_rot = util.random_rotation_translation(0.25)
91 |
92 | # Smooth rotation for display.
93 | ang = ang + 0.01
94 | a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
95 |
96 | # Modelview and modelview + projection matrices.
97 | proj = util.projection(x=0.4, n=1.0, f=200.0)
98 | r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
99 | r_mvp = np.matmul(proj, r_mv).astype(np.float32)
100 | a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
101 | a_mvp = np.matmul(proj, a_mv).astype(np.float32)
102 | a_mvc = a_mvp
103 | r_mvp = torch.as_tensor(r_mvp, dtype=torch.float32, device='cuda')
104 | a_mvp = torch.as_tensor(a_mvp, dtype=torch.float32, device='cuda')
105 |
106 | # Solve camera positions.
107 | a_campos = torch.as_tensor(np.linalg.inv(a_mv)[:3, 3], dtype=torch.float32, device='cuda')
108 | r_campos = torch.as_tensor(np.linalg.inv(r_mv)[:3, 3], dtype=torch.float32, device='cuda')
109 |
110 | # Random light direction.
111 | lightdir = np.random.normal(size=[3])
112 | lightdir /= np.linalg.norm(lightdir) + 1e-8
113 | lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')
114 |
115 | def render_refl(ldir, cpos, mvp):
116 | # Transform and rasterize.
117 | viewvec = pos[..., :3] - cpos[np.newaxis, np.newaxis, :] # View vectors at vertices.
118 | reflvec = viewvec - 2.0 * normals[np.newaxis, ...] * torch.sum(normals[np.newaxis, ...] * viewvec, -1, keepdim=True) # Reflection vectors at vertices.
119 | reflvec = reflvec / torch.sum(reflvec**2, -1, keepdim=True)**0.5 # Normalize.
120 | pos_clip = torch.matmul(pos, mvp.t())[np.newaxis, ...]
121 | rast_out, rast_out_db = dr.rasterize(glctx, pos_clip, pos_idx, [res, res])
122 | refl, refld = dr.interpolate(reflvec, rast_out, pos_idx, rast_db=rast_out_db, diff_attrs='all') # Interpolated reflection vectors.
123 |
124 | # Phong light.
125 | refl = refl / (torch.sum(refl**2, -1, keepdim=True) + 1e-8)**0.5 # Normalize.
126 | ldotr = torch.sum(-ldir * refl, -1, keepdim=True) # L dot R.
127 |
128 | # Return
129 | return refl, refld, ldotr, (rast_out[..., -1:] == 0)
130 |
131 | # Render the reflections.
132 | refl, refld, ldotr, mask = render_refl(lightdir, r_campos, r_mvp)
133 |
134 | # Reference color. No need for AA because we are not learning geometry.
135 | color = dr.texture(env[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
136 | color = color + phong_rgb_t * torch.max(zero_tensor, ldotr) ** phong_exp # Phong.
137 | color = torch.where(mask, one_tensor, color) # White background.
138 |
139 | # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead.
140 | color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
141 | color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3] # Phong.
142 | color_opt = torch.where(mask, one_tensor, color_opt) # White background.
143 |
144 | # Compute loss and train.
145 | loss = torch.mean((color - color_opt)**2) # L2 pixel loss.
146 | optimizer.zero_grad()
147 | loss.backward()
148 | optimizer.step()
149 | scheduler.step()
150 |
151 | # Collect losses.
152 | imgloss_avg.append(loss.detach().cpu().numpy())
153 | phong_avg.append(phong_var.detach().cpu().numpy())
154 |
155 | # Print/save log.
156 | if log_interval and (it % log_interval == 0):
157 | imgloss_val, imgloss_avg = np.mean(np.asarray(imgloss_avg, np.float32)), []
158 | phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32), axis=0), []
159 | phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5
160 | phong_exp_rel_err = np.abs(phong_val[3] - phong_exp)/phong_exp
161 | s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val)
162 | print(s)
163 | if log_file:
164 | log_file.write(s + '\n')
165 |
166 | # Show/save result image.
167 | display_image = display_interval and (it % display_interval == 0)
168 | save_mp4 = mp4save_interval and (it % mp4save_interval == 0)
169 |
170 | if display_image or save_mp4:
171 | lightdir = np.asarray([.8, -1., .5, 0.0])
172 | lightdir = np.matmul(a_mvc, lightdir)[:3]
173 | lightdir /= np.linalg.norm(lightdir)
174 | lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')
175 | refl, refld, ldotr, mask = render_refl(lightdir, a_campos, a_mvp)
176 | color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
177 | color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3]
178 | color_opt = torch.where(mask, one_tensor, color_opt)
179 | result_image = color_opt.detach()[0].cpu().numpy()[::-1]
180 | if display_image:
181 | util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
182 | if save_mp4:
183 | writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8))
184 |
185 | # Done.
186 | if writer is not None:
187 | writer.close()
188 | if log_file:
189 | log_file.close()
190 |
191 | #----------------------------------------------------------------------------
192 | # Main function.
193 | #----------------------------------------------------------------------------
194 |
195 | def main():
196 | parser = argparse.ArgumentParser(description='Environment map fitting example')
197 | parser.add_argument('--opengl', help='enable OpenGL rendering', action='store_true', default=False)
198 | parser.add_argument('--outdir', help='specify output directory', default='')
199 | parser.add_argument('--display-interval', type=int, default=0)
200 | parser.add_argument('--mp4save-interval', type=int, default=10)
201 | parser.add_argument('--max-iter', type=int, default=5000)
202 | args = parser.parse_args()
203 |
204 | # Set up logging.
205 | if args.outdir:
206 | out_dir = f'{args.outdir}/env_phong'
207 | print (f'Saving results under {out_dir}')
208 | else:
209 | out_dir = None
210 | print ('No output directory specified, not saving log or images')
211 |
212 | # Run.
213 | fit_env_phong(
214 | max_iter=args.max_iter,
215 | log_interval=100,
216 | display_interval=args.display_interval,
217 | out_dir=out_dir,
218 | mp4save_interval=args.mp4save_interval,
219 | mp4save_fn='progress.mp4',
220 | use_opengl=args.opengl
221 | )
222 |
223 | # Done.
224 | print("Done.")
225 |
226 | #----------------------------------------------------------------------------
227 |
228 | if __name__ == "__main__":
229 | main()
230 |
231 | #----------------------------------------------------------------------------
232 |
--------------------------------------------------------------------------------
/samples/torch/triangle.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import imageio
10 | import numpy as np
11 | import torch
12 | import nvdiffrast.torch as dr
13 | import sys
14 |
15 | def tensor(*args, **kwargs):
16 | return torch.tensor(*args, device='cuda', **kwargs)
17 |
18 | if sys.argv[1:] == ['--cuda']:
19 | glctx = dr.RasterizeCudaContext()
20 | elif sys.argv[1:] == ['--opengl']:
21 | glctx = dr.RasterizeGLContext()
22 | else:
23 | print("Specify either --cuda or --opengl")
24 | exit(1)
25 |
26 | pos = tensor([[[-0.8, -0.8, 0, 1], [0.8, -0.8, 0, 1], [-0.8, 0.8, 0, 1]]], dtype=torch.float32)
27 | col = tensor([[[1, 0, 0], [0, 1, 0], [0, 0, 1]]], dtype=torch.float32)
28 | tri = tensor([[0, 1, 2]], dtype=torch.int32)
29 |
30 | rast, _ = dr.rasterize(glctx, pos, tri, resolution=[256, 256])
31 | out, _ = dr.interpolate(col, rast, tri)
32 |
33 | img = out.cpu().numpy()[0, ::-1, :, :] # Flip vertically.
34 | img = np.clip(np.rint(img * 255), 0, 255).astype(np.uint8) # Quantize to np.uint8
35 |
36 | print("Saving to 'tri.png'.")
37 | imageio.imsave('tri.png', img)
38 |
--------------------------------------------------------------------------------
/samples/torch/util.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import numpy as np
10 | import torch
11 |
12 | #----------------------------------------------------------------------------
13 | # Projection and transformation matrix helpers.
14 | #----------------------------------------------------------------------------
15 |
16 | def projection(x=0.1, n=1.0, f=50.0):
17 | return np.array([[n/x, 0, 0, 0],
18 | [ 0, n/x, 0, 0],
19 | [ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)],
20 | [ 0, 0, -1, 0]]).astype(np.float32)
21 |
22 | def translate(x, y, z):
23 | return np.array([[1, 0, 0, x],
24 | [0, 1, 0, y],
25 | [0, 0, 1, z],
26 | [0, 0, 0, 1]]).astype(np.float32)
27 |
28 | def rotate_x(a):
29 | s, c = np.sin(a), np.cos(a)
30 | return np.array([[1, 0, 0, 0],
31 | [0, c, s, 0],
32 | [0, -s, c, 0],
33 | [0, 0, 0, 1]]).astype(np.float32)
34 |
35 | def rotate_y(a):
36 | s, c = np.sin(a), np.cos(a)
37 | return np.array([[ c, 0, s, 0],
38 | [ 0, 1, 0, 0],
39 | [-s, 0, c, 0],
40 | [ 0, 0, 0, 1]]).astype(np.float32)
41 |
42 | def random_rotation_translation(t):
43 | m = np.random.normal(size=[3, 3])
44 | m[1] = np.cross(m[0], m[2])
45 | m[2] = np.cross(m[0], m[1])
46 | m = m / np.linalg.norm(m, axis=1, keepdims=True)
47 | m = np.pad(m, [[0, 1], [0, 1]], mode='constant')
48 | m[3, 3] = 1.0
49 | m[:3, 3] = np.random.uniform(-t, t, size=[3])
50 | return m
51 |
52 | #----------------------------------------------------------------------------
53 | # Bilinear downsample by 2x.
54 | #----------------------------------------------------------------------------
55 |
56 | def bilinear_downsample(x):
57 | w = torch.tensor([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=torch.float32, device=x.device) / 64.0
58 | w = w.expand(x.shape[-1], 1, 4, 4)
59 | x = torch.nn.functional.conv2d(x.permute(0, 3, 1, 2), w, padding=1, stride=2, groups=x.shape[-1])
60 | return x.permute(0, 2, 3, 1)
61 |
62 | #----------------------------------------------------------------------------
63 | # Image display function using OpenGL.
64 | #----------------------------------------------------------------------------
65 |
66 | _glfw_window = None
67 | def display_image(image, zoom=None, size=None, title=None): # HWC
68 | # Import OpenGL and glfw.
69 | import OpenGL.GL as gl
70 | import glfw
71 |
72 | # Zoom image if requested.
73 | image = np.asarray(image)
74 | if size is not None:
75 | assert zoom is None
76 | zoom = max(1, size // image.shape[0])
77 | if zoom is not None:
78 | image = image.repeat(zoom, axis=0).repeat(zoom, axis=1)
79 | height, width, channels = image.shape
80 |
81 | # Initialize window.
82 | if title is None:
83 | title = 'Debug window'
84 | global _glfw_window
85 | if _glfw_window is None:
86 | glfw.init()
87 | _glfw_window = glfw.create_window(width, height, title, None, None)
88 | glfw.make_context_current(_glfw_window)
89 | glfw.show_window(_glfw_window)
90 | glfw.swap_interval(0)
91 | else:
92 | glfw.make_context_current(_glfw_window)
93 | glfw.set_window_title(_glfw_window, title)
94 | glfw.set_window_size(_glfw_window, width, height)
95 |
96 | # Update window.
97 | glfw.poll_events()
98 | gl.glClearColor(0, 0, 0, 1)
99 | gl.glClear(gl.GL_COLOR_BUFFER_BIT)
100 | gl.glWindowPos2f(0, 0)
101 | gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1)
102 | gl_format = {3: gl.GL_RGB, 2: gl.GL_RG, 1: gl.GL_LUMINANCE}[channels]
103 | gl_dtype = {'uint8': gl.GL_UNSIGNED_BYTE, 'float32': gl.GL_FLOAT}[image.dtype.name]
104 | gl.glDrawPixels(width, height, gl_format, gl_dtype, image[::-1])
105 | glfw.swap_buffers(_glfw_window)
106 | if glfw.window_should_close(_glfw_window):
107 | return False
108 | return True
109 |
110 | #----------------------------------------------------------------------------
111 | # Image save helper.
112 | #----------------------------------------------------------------------------
113 |
114 | def save_image(fn, x):
115 | import imageio
116 | x = np.rint(x * 255.0)
117 | x = np.clip(x, 0, 255).astype(np.uint8)
118 | imageio.imsave(fn, x)
119 |
120 | #----------------------------------------------------------------------------
121 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import nvdiffrast
10 | import setuptools
11 | import os
12 |
13 | with open("README.md", "r") as fh:
14 | long_description = fh.read()
15 |
16 | setuptools.setup(
17 | name="nvdiffrast",
18 | version=nvdiffrast.__version__,
19 | author="Samuli Laine",
20 | author_email="slaine@nvidia.com",
21 | description="nvdiffrast - modular primitives for high-performance differentiable rendering",
22 | long_description=long_description,
23 | long_description_content_type="text/markdown",
24 | url="https://github.com/NVlabs/nvdiffrast",
25 | packages=setuptools.find_packages(),
26 | package_data={
27 | 'nvdiffrast': [
28 | 'common/*.h',
29 | 'common/*.inl',
30 | 'common/*.cu',
31 | 'common/*.cpp',
32 | 'common/cudaraster/*.hpp',
33 | 'common/cudaraster/impl/*.cpp',
34 | 'common/cudaraster/impl/*.hpp',
35 | 'common/cudaraster/impl/*.inl',
36 | 'common/cudaraster/impl/*.cu',
37 | 'lib/*.h',
38 | 'torch/*.h',
39 | 'torch/*.inl',
40 | 'torch/*.cpp',
41 | 'tensorflow/*.cu',
42 | ] + (['lib/*.lib'] if os.name == 'nt' else [])
43 | },
44 | include_package_data=True,
45 | install_requires=['numpy'], # note: can't require torch here as it will install torch even for a TensorFlow container
46 | classifiers=[
47 | "Programming Language :: Python :: 3",
48 | "Operating System :: OS Independent",
49 | ],
50 | python_requires='>=3.6',
51 | )
52 |
--------------------------------------------------------------------------------