├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── devsetup.bat
├── docs
    └── images
    │   └── grr_main.png
├── grr
    ├── Threading.hlsl
    ├── __init__.py
    ├── __main__.py
    ├── camera.py
    ├── clear_target_cs.hlsl
    ├── coverage.hlsl
    ├── coverage_lut_tool.hlsl
    ├── coverage_lut_tool.py
    ├── data
    │   ├── debug_font.jpg
    │   └── default-scenes
    │   │   ├── cube
    │   │       └── cube.obj
    │   │   ├── dragon
    │   │       └── dragon.obj
    │   │   ├── sponza
    │   │       ├── copyright.txt
    │   │       ├── sponza.mtl
    │   │       ├── sponza.obj
    │   │       └── textures
    │   │       │   ├── background.png
    │   │       │   ├── background_bump.png
    │   │       │   ├── chain_texture.png
    │   │       │   ├── chain_texture_bump.png
    │   │       │   ├── chain_texture_mask.png
    │   │       │   ├── floor_gloss.png
    │   │       │   ├── lion.png
    │   │       │   ├── lion2_bump.png
    │   │       │   ├── lion_bump.png
    │   │       │   ├── spnza_bricks_a_bump.png
    │   │       │   ├── spnza_bricks_a_diff.png
    │   │       │   ├── spnza_bricks_a_spec.png
    │   │       │   ├── sponza_arch_bump.png
    │   │       │   ├── sponza_arch_diff.png
    │   │       │   ├── sponza_arch_spec.png
    │   │       │   ├── sponza_ceiling_a_diff.png
    │   │       │   ├── sponza_ceiling_a_spec.png
    │   │       │   ├── sponza_column_a_bump.png
    │   │       │   ├── sponza_column_a_diff.png
    │   │       │   ├── sponza_column_a_spec.png
    │   │       │   ├── sponza_column_b_bump.png
    │   │       │   ├── sponza_column_b_diff.png
    │   │       │   ├── sponza_column_b_spec.png
    │   │       │   ├── sponza_column_c_bump.png
    │   │       │   ├── sponza_column_c_diff.png
    │   │       │   ├── sponza_column_c_spec.png
    │   │       │   ├── sponza_curtain_blue_diff.png
    │   │       │   ├── sponza_curtain_diff.png
    │   │       │   ├── sponza_curtain_green_diff.png
    │   │       │   ├── sponza_details_diff.png
    │   │       │   ├── sponza_details_spec.png
    │   │       │   ├── sponza_fabric_blue_diff.png
    │   │       │   ├── sponza_fabric_diff.png
    │   │       │   ├── sponza_fabric_green_diff.png
    │   │       │   ├── sponza_fabric_purple.png
    │   │       │   ├── sponza_fabric_spec.png
    │   │       │   ├── sponza_flagpole_diff.png
    │   │       │   ├── sponza_flagpole_spec.png
    │   │       │   ├── sponza_floor_a_diff.png
    │   │       │   ├── sponza_floor_a_spec.png
    │   │       │   ├── sponza_roof_diff.png
    │   │       │   ├── sponza_thorn_bump.png
    │   │       │   ├── sponza_thorn_diff.png
    │   │       │   ├── sponza_thorn_mask.png
    │   │       │   ├── sponza_thorn_spec.png
    │   │       │   ├── vase_bump.png
    │   │       │   ├── vase_dif.png
    │   │       │   ├── vase_hanging.png
    │   │       │   ├── vase_plant.png
    │   │       │   ├── vase_plant_mask.png
    │   │       │   ├── vase_plant_spec.png
    │   │       │   ├── vase_round.png
    │   │       │   ├── vase_round_bump.png
    │   │       │   └── vase_round_spec.png
    │   │   └── teapot
    │   │       └── teapot.obj
    ├── debug_font.hlsl
    ├── debug_font.py
    ├── default_scenes.py
    ├── depth_utils.hlsl
    ├── editor.py
    ├── geometry.hlsl
    ├── gpugeo.py
    ├── overlay.py
    ├── overlay_cs.hlsl
    ├── prefix_sum.py
    ├── prefix_sum_cs.hlsl
    ├── profiler.py
    ├── raster.py
    ├── raster_cs.hlsl
    ├── raster_util.hlsl
    ├── test_bench.py
    ├── transform.py
    ├── utilities.py
    └── vec.py
└── requirements.txt


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.obj filter=lfs diff=lfs merge=lfs -text
2 | *.mtl filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .tundra*
 2 | t2-output
 3 | *.swp
 4 | imgui.ini
 5 | editor_state.json
 6 | __pycache__
 7 | .vscode
 8 | tags
 9 | .shader_pdb
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Kleber Garcia
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GRR
 2 | 
 3 | ```
 4 |                         +--------------------------------+
 5 |                         {  ____________________________  }
 6 |                         { /  _____/\______   \______   \ }
 7 |                         {/   \  ___ |       _/|       _/ }
 8 |                         {\    \_\  \|    |   \|    |   \ }
 9 |                         { \______  /|____|_  /|____|_  / }
10 |                         {        \/        \/        \/  }
11 |                         +--------------------------------+
12 |                         {  Gpu Renderer and Rasterizer   }
13 |                         {  Kleber Garcia (c) 2021        }
14 |                         {  v 0.1                         }
15 |                         +--------------------------------+
16 | ```
17 | 
18 | ![grrmain](docs/images/grr_main.png?raw=true)
19 | 
20 | GRR is a compute gpu rasterizer and renderer built on top of [coalpy](https://github.com/kecho/coalpy)
21 | 
22 | GRR is entirely written in compute hlsl and python. To build ensure to run the dependencies (or install them manually). Dependencies are defined in the setupdeps.bat script
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/devsetup.bat:
--------------------------------------------------------------------------------
1 | echo off
2 | rem Run this batch file to set windows environment variables for coalpy.
3 | echo Adding Coalpy dev debug directory
4 | set PYTHONPATH=..\coalpy\t2-output\win64-msvc-debug-default\
5 | 


--------------------------------------------------------------------------------
/docs/images/grr_main.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kecho/grr/bc94fc6f001074f5b692a6d06e6d18d22bae172f/docs/images/grr_main.png


--------------------------------------------------------------------------------
/grr/Threading.hlsl:
--------------------------------------------------------------------------------
 1 | #ifndef __THREADING_H__
 2 | #define __THREADING_H__
 3 | 
 4 | #ifndef GroupSize
 5 | #error "Must define a group size"
 6 | #endif
 7 | 
 8 | namespace Threading
 9 | {
10 | 
11 | groupshared uint gs_groupCache[GroupSize];
12 | 
13 | struct Group
14 | {
15 |     uint m_threadID;
16 |     void init(uint groupThreadIndex)
17 |     {
18 |         m_threadID = groupThreadIndex;
19 |     }
20 | 
21 |     void prefixExclusive(uint value, out uint sum, out uint count)
22 |     {
23 |         gs_groupCache[m_threadID] = value;
24 | 
25 |         GroupMemoryBarrierWithGroupSync();
26 | 
27 |         for (uint i = 1; i < GroupSize; i <<= 1)
28 |         {
29 |             uint prevValue = m_threadID >= i ? gs_groupCache[m_threadID - i] : 0;
30 | 
31 |             GroupMemoryBarrierWithGroupSync();
32 |     
33 |             gs_groupCache[m_threadID] += prevValue;
34 | 
35 |             GroupMemoryBarrierWithGroupSync();
36 |         }
37 | 
38 |         GroupMemoryBarrierWithGroupSync();
39 | 
40 |         sum = gs_groupCache[m_threadID] - value;
41 |         count = gs_groupCache[GroupSize - 1]; 
42 |     }
43 | };
44 | 
45 | 
46 | }
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/grr/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import pathlib
 4 | import coalpy.gpu as g
 5 | 
 6 | 
 7 | print ("graphics devices:")
 8 | [print("{}: {}".format(idx, nm)) for (idx, nm) in g.get_adapters()]
 9 | 
10 | def _checkGpu(gpuInfo, substring):
11 |     (idx, nm) = gpuInfo
12 |     return substring in nm.lower()
13 | 
14 | #if we find an nvidia or amd gpu, the first one, we select it.
15 | print ("GRR - GPU Render And Rasterizer")
16 | selected_gpu = next((adapter for adapter in g.get_adapters() if _checkGpu(adapter, "nvidia") or _checkGpu(adapter, "amd")), None)
17 | if selected_gpu is not None:
18 |     print ("Setting gpu %d" % selected_gpu[0] )
19 |     g.get_settings().adapter_index = selected_gpu[0]
20 | 
21 | #g.get_settings().spirv_debug_reflection = True
22 | g.get_settings().enable_debug_device = False
23 | g.get_settings().graphics_api = "dx12"
24 | 
25 | 
26 | g_module_path = os.path.dirname(pathlib.Path(sys.modules[__name__].__file__)) + "\\"
27 | g.add_data_path(g_module_path)
28 | 
29 | def get_module_path():
30 |     return g_module_path
31 | 


--------------------------------------------------------------------------------
/grr/__main__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import coalpy.gpu as g
 3 | 
 4 | from . import editor
 5 | from . import gpugeo
 6 | from . import utilities
 7 | from . import raster
 8 | from . import overlay
 9 | 
10 | info = g.get_current_adapter_info()
11 | print("""
12 | +--------------------------------+
13 | {  ____________________________  } 
14 | { /  _____/\______   \______   \ }   
15 | {/   \  ___ |       _/|       _/ } 
16 | {\    \_\  \|    |   \|    |   \ } 
17 | { \______  /|____|_  /|____|_  / } 
18 | {        \/        \/        \/  } 
19 | +--------------------------------+
20 | {  Gpu Renderer and Rasterizer   } 
21 | {  Kleber Garcia (c) 2021        }
22 | {  v 0.1                         }
23 | +--------------------------------+
24 | """)
25 | print("device: {}".format(info[1]))
26 | initial_w = 1600 
27 | initial_h = 900
28 | geo = gpugeo.GpuGeo()
29 | rasterizer = raster.Rasterizer(initial_w, initial_h)
30 | active_editor = editor.Editor(geo, None)
31 | active_editor.load_editor_state()
32 | 
33 | def on_render(render_args : g.RenderArgs):
34 |     output_texture = render_args.window.display_texture
35 |     if render_args.width == 0 or render_args.height == 0:
36 |         return False
37 | 
38 |     active_editor.build_ui(render_args.imgui, render_args.implot)
39 |     viewports = active_editor.viewports
40 |     active_editor.profiler.begin_capture()
41 |     active_editor.render_tools()
42 |     for vp in viewports:
43 |         cmd_list = g.CommandList()
44 |         w = vp.width
45 |         h = vp.height
46 |         if w == 0 or h == 0 or vp.texture == None:
47 |             continue
48 | 
49 |         vp.update(render_args.delta_time)
50 | 
51 |         utilities.clear_texture(
52 |             cmd_list, [0.0, 0.0, 0.0, 0.0],
53 |             rasterizer.visibility_buffer, w, h)
54 | 
55 |         rasterizer.rasterize(
56 |             cmd_list,
57 |             w, h,
58 |             vp.camera.view_matrix,
59 |             vp.camera.proj_matrix,
60 |             geo,
61 |             vp)
62 | 
63 |         overlay.render_overlay(
64 |             cmd_list,
65 |             rasterizer, vp.texture, vp)
66 | 
67 |         g.schedule(cmd_list)
68 |     active_editor.profiler.end_capture()
69 | 
70 |     return
71 | 
72 | w = g.Window(
73 |     title="GRR - gpu rasterizer and renderer for python. Kleber Garcia, 2021",
74 |     on_render = on_render,
75 |     width = initial_w, height = initial_h)
76 | 
77 | g.run()
78 | active_editor.save_editor_state()
79 | w = None
80 | 


--------------------------------------------------------------------------------
/grr/camera.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from . import vec
  3 | from . import transform as t
  4 | 
  5 | #synchronize value with INVERTED_DEPTH in depth_utils.hlsl
  6 | g_InvertedDepth = True
  7 | 
  8 | class Camera:
  9 | 
 10 |     s_DirtyProj = 1 << 0
 11 | 
 12 |     def __init__(self, w, h):
 13 |         self.m_fov = 20 * t.to_radians()
 14 |         self.m_w = w
 15 |         self.m_h = h
 16 |         self.m_near = 0.1
 17 |         self.m_far = 10000
 18 |         self.m_focus_distance = 1.0
 19 |         self.m_transform = t.Transform()
 20 |         self.m_transform.scale = [-1,1,-1]
 21 |         self.m_proj_matrix = t.Transform.Identity()
 22 |         self.m_proj_inv_matrix = t.Transform.Identity()
 23 |         self.m_dirty_flags = Camera.s_DirtyProj
 24 |         self.update_mats()
 25 |         return
 26 | 
 27 |     @property
 28 |     def transform(self):
 29 |         return self.m_transform
 30 | 
 31 |     @property
 32 |     def pos(self):
 33 |         return self.m_transform.translation
 34 | 
 35 |     @property
 36 |     def focus_distance(self):
 37 |         return self.m_focus_distance
 38 | 
 39 |     @property
 40 |     def focus_point(self):
 41 |         return self.pos + self.m_focus_distance * self.m_transform.front
 42 | 
 43 |     @property
 44 |     def fov(self):
 45 |         return self.m_fov
 46 | 
 47 |     @property
 48 |     def w(self):
 49 |         return self.m_w
 50 | 
 51 |     @property
 52 |     def h(self):
 53 |         return self.m_h
 54 | 
 55 |     @property
 56 |     def near(self):
 57 |         return self.m_near
 58 | 
 59 |     @property
 60 |     def far(self):
 61 |         return self.m_far
 62 | 
 63 |     @property
 64 |     def proj_matrix(self):
 65 |         self.update_mats()
 66 |         return self.m_proj_matrix
 67 | 
 68 |     @property
 69 |     def proj_inv_matrix(self):
 70 |         self.update_mats()
 71 |         return self.m_proj_inv_matrix
 72 | 
 73 |     @property
 74 |     def view_matrix(self):
 75 |         return self.m_transform.transform_inv_matrix
 76 | 
 77 |     @fov.setter
 78 |     def fov(self, value):
 79 |         self.m_dirty_flags = Camera.s_DirtyProj
 80 |         self.m_fov = value
 81 | 
 82 |     @pos.setter
 83 |     def pos(self, value):
 84 |         self.m_transform.translation = value
 85 | 
 86 |     @pos.setter
 87 |     def rotation(self, value):
 88 |         self.m_transform.rotation = value
 89 | 
 90 |     @w.setter
 91 |     def w(self, value):
 92 |         self.m_dirty_flags = Camera.s_DirtyProj
 93 |         self.m_w = value
 94 | 
 95 |     @h.setter
 96 |     def h(self, value):
 97 |         self.m_dirty_flags = Camera.s_DirtyProj
 98 |         self.m_h = value
 99 | 
100 |     @near.setter
101 |     def near(self, value):
102 |         self.m_dirty_flags = Camera.s_DirtyProj
103 |         self.m_near = value
104 | 
105 |     @far.setter
106 |     def far(self, value):
107 |         self.m_dirty_flags = Camera.s_DirtyProj
108 |         self.m_far = value
109 | 
110 |     @focus_distance.setter
111 |     def focus_distance(self, value):
112 |         self.m_focus_distance = value
113 | 
114 |     def update_mats(self):
115 |         if ((self.m_dirty_flags & Camera.s_DirtyProj) != 0):
116 |             (n, f) = (self.m_near, self.m_far)
117 |             if g_InvertedDepth:
118 |                 (f, n) = (self.m_near, self.m_far)
119 |             self.m_proj_matrix = t.projection_matrix_from_aspect(self.m_fov, self.m_h / self.m_w, n, f)
120 |             self.m_proj_inv_matrix = np.linalg.inv(self.m_proj_matrix)
121 | 


--------------------------------------------------------------------------------
/grr/clear_target_cs.hlsl:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | RWTexture2D<float4> g_output : register(u0);
 4 | cbuffer Constants : register(b0)
 5 | {
 6 |     float4 clearColor;
 7 | }
 8 | 
 9 | [numthreads(8,8,1)]
10 | void csMainClear(int2 dti : SV_DispatchThreadID)
11 | {
12 |     g_output[dti] = clearColor;
13 | }
14 | 
15 | cbuffer ConstantsUintBuff : register(b0)
16 | {
17 |     uint g_uintClearVal;
18 |     int g_clearOffset;
19 |     int g_clearValSize;
20 | }
21 | 
22 | RWBuffer<uint> g_output_buff_uint : register(u0);
23 | [numthreads(64,1,1)]
24 | void csMainClearUintBuffer(int3 dti : SV_DispatchThreadID)
25 | {
26 |     if (dti.x >= g_clearValSize)
27 |         return;
28 | 
29 |     g_output_buff_uint[g_clearOffset + dti.x] = g_uintClearVal;
30 | }
31 | 


--------------------------------------------------------------------------------
/grr/coverage.hlsl:
--------------------------------------------------------------------------------
  1 | /*
  2 | MIT License
  3 | 
  4 | Copyright (c) 2022 Kleber Garcia
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | */
 24 | 
 25 | #ifndef __COVERAGE__
 26 | #define __COVERAGE__
 27 | 
 28 | //Utilities for coverage bit mask on an 8x8 grid.
 29 | namespace coverage
 30 | {
 31 | 
 32 | //**************************************************************************************************************/
 33 | //                                           How to use
 34 | //**************************************************************************************************************/
 35 | /*
 36 | To utilize this library, first call the genLUT function at the beginning of your compute shader.
 37 | This function must be followed by a group sync. Example follows:
 38 | 
 39 | ...
 40 | coverage::genLUT(groupThreadIndex);
 41 | GroupMemoryBarrierWithGroupSync();
 42 | ...
 43 | 
 44 | Alternatively, you can dump the contents into buffer. The contents of the LUT are inside gs_quadMask, which is 64 entries.
 45 | 
 46 | After this use the coverage functions. For example:
 47 | 
 48 | uint2 lineCoverage = coverage::lineCoverageMask(float2(0.0, 0.0), float2(0.5, 0.5), 0.2, 0.2);
 49 | 
 50 | This line will hold a 8x8 mask of coverage for such line.
 51 | 
 52 | 
 53 | */
 54 | 
 55 | //**************************************************************************************************************/
 56 | //                                        Coordinate System 
 57 | //**************************************************************************************************************/
 58 | /*
 59 | The functions in this library follow the same convension, input is a shape described by certain vertices,
 60 | output is a 64 bit mask with such shape's coverage.
 61 | 
 62 | The coordinate system is (0,0) for the top left of an 8x8 grid, and (1,1) for the bottom right.
 63 | The LSB represents coordinate (0,0), and sample points are centered on the pixel.
 64 | 
 65 | (0.0,0.0)                           (1.0,0.0)
 66 |     |                                   |
 67 |     |___________________________________|
 68 |     |   |   |   |   |   |   |   |   |   |
 69 |     | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
 70 |     |___|___|___|___|___|___|___|___|___|
 71 |     |   |   |   |   |   |   |   |   |   |
 72 |     | 9 | 10| 11| 12| 13| 14| 15| 16| 17|
 73 |     |___|___|___|___|___|___|___|___|___|___(1.0, 2.0/8.0)
 74 | 
 75 |  the center of bit 0 would be 0.5,0.5 and so on
 76 | 
 77 | any points outside of the range (0,1) means they are outside the grid.
 78 | */
 79 | 
 80 | //**************************************************************************************************************/
 81 | //                                           coverage API
 82 | //**************************************************************************************************************/
 83 | 
 84 | /*
 85 | Call this function to generate the coverage 4x4 luts
 86 | groupThreadIndex - the thread index.
 87 | NOTE: must sync group threads after calling this. 
 88 | */
 89 | void genLUT(uint groupThreadIndex);
 90 | 
 91 | /*
 92 | Call this function to get a 64 bit coverage mask for a triangle.
 93 | v0, v1, v2 - the triangle coordinates in right hand ruling order
 94 | return - the coverage mask for this triangle
 95 | */
 96 | uint2 triangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface, bool isConservative = false);
 97 | 
 98 | 
 99 | /*
100 | Call this function to get a 64 bit coverage mask for a line.
101 | v0, v1 - the line coordinates.
102 | thickness - thickness of line in normalized space. 1.0 means the entire 8 pixels in a tile
103 | caps - extra pixels in the caps of the line in normalized space. 1.0 means 8 pixels in a tile
104 | return - the coverage mask of this line
105 | */
106 | uint2 lineCoverageMask(float2 v0, float2 v1, float thickness, float caps);
107 | 
108 | 
109 | //**************************************************************************************************************/
110 | //                                       coverage implementation 
111 | //**************************************************************************************************************/
112 | 
113 | /*
114 | function that builds a 4x4 compact bit quad for line coverage.
115 | the line is assumed to have a positive slope < 1.0. That means it can only be raised 1 step at most.
116 | "incrementMask" is a bit mask specifying how much the y component of a line increments.
117 | "incrementMask" only describes 4 bits, the rest of the bits are ignored.
118 | For example, given this bit mask:
119 | 1 0 1 0
120 | would generate this 4x4 coverage mask:
121 | 
122 | 0 0 0 0 
123 | 0 0 0 1 <- 3rd bit tells the line to raise here
124 | 0 1 1 1 <- first bit raises the line
125 | 1 1 1 1 <- low axis is always covered
126 | */
127 | uint buildQuadMask(uint incrementMask)
128 | {
129 |     uint c = 0;
130 | 
131 |     uint mask = 0xF;
132 |     for (int r = 0; r < 4; ++r)
133 |     {
134 |         c |= mask << (r * 4);
135 |         if (incrementMask == 0)
136 |             break;
137 |         int b = firstbitlow(incrementMask);
138 |         mask = ((0xFu << (b + 1)) & 0xFu);
139 |         incrementMask ^= 1u << b;
140 |     }
141 | 
142 |     return c;
143 | }
144 | 
145 | /*
146 | lut for 4x4 quad mask. See buildQuadMask function, packed in 16 bits.
147 | 4 states for horizontal flipping and vertical flipping
148 | You can dump this lut to a buffer, and preload it manually,
149 | or just regenerated in your thread group
150 | */
151 | groupshared uint gs_quadMask[8]; 
152 | 
153 | // Builds all the luts necessary for fast bit based coverage
154 | void genLUT(uint groupThreadIndex)
155 | {
156 |     if (groupThreadIndex < 8u)
157 |     {
158 |         uint m0 = buildQuadMask((groupThreadIndex << 1) | 0);
159 |         uint m1 = buildQuadMask((groupThreadIndex << 1) | 1);
160 |         gs_quadMask[groupThreadIndex] = m0 | (m1 << 16);
161 |     }
162 | }
163 | 
164 | uint sampleLUT(uint lookup)
165 | {
166 |     uint mask = (gs_quadMask[lookup >> 1] >> (16 * (lookup & 0x1))) & 0xFFFF;
167 |     return (mask & 0xF) | ((mask & 0xF0) << 4) | ((mask & 0xF00) << 8) | ((mask & 0xF000) << 12);
168 | }
169 | 
170 | uint2 transposeCoverageMask(uint2 mask)
171 | {
172 |     //1x1 transpose
173 |     mask = ((mask & 0x00aa00aa) << 7) | ((mask & 0x55005500) >> 7) | (mask & 0xaa55aa55);
174 | 
175 |     //2x2 transpose
176 |     mask = ((mask & 0x0000cccc) << 14) | ((mask & 0x33330000) >> 14) | (mask & 0xcccc3333);
177 | 
178 |     //4x4
179 |     mask = uint2((mask.y & 0x0f0f0f0f) << 4, (mask.x & 0xf0f0f0f0) >> 4) | (mask & uint2(0x0f0f0f0f, 0xf0f0f0f0));
180 |     return mask;
181 | }
182 | 
183 | uint2 mirrorXCoverageMask(uint2 mask)
184 | {
185 |     //flip 1 in x
186 |     mask = ((mask & 0x55555555) << 1) | ((mask & 0xaaaaaaaa) >> 1);
187 | 
188 |     //flip 2 in x
189 |     mask = ((mask & 0xcccccccc) >> 2) | ((mask & 0x33333333) << 2);
190 | 
191 |     //flip 4 in x
192 |     mask = ((mask & 0xf0f0f0f0) >> 4) | ((mask & 0x0f0f0f0f) << 4);
193 |     return mask;
194 | }
195 | 
196 | uint2 mirrorYCoverageMask(uint2 mask)
197 | {
198 |     //flip 4 in y
199 |     mask.yx = mask;
200 |     //flip 2 in y
201 |     mask = ((mask & 0x0000ffff) << (uint2)16u) | ((mask & 0xffff0000) >> (uint2)16u);
202 |     //flip 1 in y
203 |     mask = ((mask & 0x00ff00ff) << 8) | ((mask & 0xff00ff00) >> 8);
204 | 
205 |     return  mask;
206 | }
207 | 
208 | #define COVERAGE_LINE_FLAGS_TRANSPOSE (1 << 0)
209 | #define COVERAGE_LINE_FLAGS_X_FLIP (1 << 1)
210 | #define COVERAGE_LINE_FLAGS_Y_FLIP (1 << 2)
211 | #define COVERAGE_LINE_FLAGS_VALID (1 << 3)
212 | 
213 | // Represents a 2D analytical line.
214 | // stores slope (a) and offset (b)
215 | struct Line
216 | {
217 |     float a;
218 |     float b;
219 | 
220 |     // Evaluates f(x) = a * x + b for the line
221 |     float eval(float xval)
222 |     {
223 |         return xval * a + b;
224 |     }
225 | 
226 |     // Evaluates 4 inputs of f(x) = a * x + b for the line
227 |     float4 eval4(float4 xvals)
228 |     {
229 |         return xvals * a + b;
230 |     }
231 | 
232 |     // Evaluates a single 2d in the line given an X.
233 |     float2 pointAt(float xv)
234 |     {
235 |         return float2(xv, eval(xv));
236 |     }
237 | };
238 | 
239 | 
240 | // Builds an analytical line based on two points.
241 | Line buildLine(float2 v0, float2 v1)
242 | {
243 |     //line equation: f(x): a * x + b;
244 |     // where a = (v1.y - v0.y)/(v1.x - v0.x)
245 |     float2 l = v1 - v0;
246 |     Line li;
247 |     li.a = l.y/l.x;
248 |     li.b = v1.y - li.a * v1.x;
249 |     return li;
250 | }
251 | 
252 | 
253 | // Builds a "Positive" line.
254 | // A positive line is defined as having a positive slope less than 1.0.
255 | // The positive line stores also flags that can be used to recover the original line.
256 | Line buildPositiveLine(float2 v0, float2 v1, out uint flags)
257 | {
258 |     //build line with flip bits for lookup compression
259 |     //This line will have a slope between 0 and 0.5, and always positive.
260 |     //We output the flips as bools
261 | 
262 |     Line li;
263 |     flags = 0u;
264 | 
265 |     if (v0.x > v1.x)
266 |     {
267 |         flags |= COVERAGE_LINE_FLAGS_X_FLIP;
268 |         v0.x = 1.0 - v0.x;
269 |         v1.x = 1.0 - v1.x;
270 |     }
271 |     if (v0.y > v1.y)
272 |     {
273 |         flags |= COVERAGE_LINE_FLAGS_Y_FLIP;
274 |         v0.y = 1.0 - v0.y;
275 |         v1.y = 1.0 - v1.y;
276 |     }
277 | 
278 |     float2 ll = v1 - v0;
279 |     flags |= abs(ll.y) > abs(ll.x) ? COVERAGE_LINE_FLAGS_TRANSPOSE : 0u;
280 |     if (flags & COVERAGE_LINE_FLAGS_TRANSPOSE)
281 |     {
282 |         ll.xy = ll.yx;
283 |         v0.xy = v0.yx;
284 |         v1.xy = v1.yx;
285 |     }
286 | 
287 |     flags |= any(v1 != v0) ? COVERAGE_LINE_FLAGS_VALID : 0;
288 |     li.a = ll.y/ll.x;
289 |     li.b = v1.y - li.a * v1.x;
290 |     return li;
291 | }
292 | 
293 | // Packing
294 | // [bits] | [data]
295 | //  0-3   | left_mask
296 | //  4-7   | right_mask
297 | //  8-11  | left_offset
298 | //  12-15 | right_offset
299 | //  16-20 | flags 
300 | #define COVERAGE_DATA_BIT_MASK ((1u << 4) - 1u)
301 | #define COVERAGE_LEFT_MASK_BIT_SHIFT 0
302 | #define COVERAGE_RIGHT_MASK_BIT_SHIFT 4
303 | #define COVERAGE_FLAGS_OFFSET_BIT_SHIFT 8
304 | 
305 | /*
306 | Represents a set of bits in an 8x8 grid divided by a line.
307 | The representation is given by 2 splits of the 8x8 grid.
308 | offsets represents how much we offset the quadCoverage on either x or y (flipped dependant axis)
309 | the mask represents the increment mask used to look up the quadCoverage
310 | */
311 | struct LineArea
312 | {
313 |     uint coverageData;
314 |     int2 offsets;
315 |     Line debugLine;
316 | } ;
317 | 
318 | // Creates a line area object, based on 2 points on an 8x8 quad
319 | // quad coordinate domain is 0.0 -> 1.0 for both axis.
320 | // Anything negative or greater than 1.0 is by definition outside of the 8x8 quad.
321 | LineArea buildLineArea(float2 v0, float2 v1)
322 | {
323 |     LineArea data;
324 | 
325 |     //line debug data
326 |     data.debugLine = buildLine(v0, v1);
327 | 
328 |     uint flags;
329 |     Line l = buildPositiveLine(v0, v1, flags);
330 |     data.coverageData = (flags & COVERAGE_DATA_BIT_MASK) << COVERAGE_FLAGS_OFFSET_BIT_SHIFT;
331 | 
332 |     // Xs values of 8 points
333 |     const float4 xs0 = float4(0.5,1.5,2.5,3.5)/8.0;
334 |     const float4 xs1 = float4(4.5,5.5,6.5,7.5)/8.0;
335 | 
336 |     // Ys values of 8 points
337 |     float4 ys0 = l.eval4(xs0);
338 |     float4 ys1 = l.eval4(xs1);
339 | 
340 |     int4 ysi0 = clamp((int4)floor(ys0 * 8.0 - 0.5), -1,8);
341 |     int4 ysi1 = clamp((int4)floor(ys1 * 8.0 - 0.5), -1,8);
342 | 
343 |     // Incremental masks
344 |     uint4 dysmask0 = uint4(ysi0.yzw, ysi1.x) - ysi0.xyzw;
345 |     uint4 dysmask1 = uint4(ysi1.yzw, 0) - uint4(ysi1.xyz, 0);
346 | 
347 | 
348 |     // Final output, offset and mask
349 |     uint mask0 = dysmask0.x | (dysmask0.y << 1) | (dysmask0.z << 2) | (dysmask0.w << 3);
350 |     data.coverageData |= (mask0 & COVERAGE_DATA_BIT_MASK) << COVERAGE_LEFT_MASK_BIT_SHIFT;
351 |     uint mask1 = dysmask1.x | (dysmask1.y << 1) | (dysmask1.z << 2) | (dysmask1.w << 3);
352 |     data.coverageData |= (mask1 & COVERAGE_DATA_BIT_MASK) << COVERAGE_RIGHT_MASK_BIT_SHIFT;
353 |     data.offsets = int2(ysi0.x, countbits(mask0) + ysi0.x);
354 | 
355 |     return data;
356 | }
357 | 
358 | uint2 createCoverageMask(in LineArea lineArea)
359 | {
360 |     const uint leftSideMask = 0x0F0F0F0F;
361 |     const uint2 horizontalMask = uint2(leftSideMask, ~leftSideMask);
362 |     int2 offsets = lineArea.offsets;
363 | 
364 |     uint2 halfSamples = uint2(
365 |         sampleLUT((lineArea.coverageData >> COVERAGE_LEFT_MASK_BIT_SHIFT) & COVERAGE_DATA_BIT_MASK),
366 |         sampleLUT((lineArea.coverageData >> COVERAGE_RIGHT_MASK_BIT_SHIFT) & COVERAGE_DATA_BIT_MASK));
367 |     
368 |     uint2 sideMasks = uint2(halfSamples.x, (halfSamples.y) << 4);
369 | 
370 |     // 4 quadrands (top left, top right, bottom left, bottom right)
371 |     int4 quadrantOffsets = clamp((offsets.xyxy - int4(0,0,4,4)) << 3, -31, 31);
372 | 
373 |     uint flags = (lineArea.coverageData >> COVERAGE_FLAGS_OFFSET_BIT_SHIFT) & COVERAGE_DATA_BIT_MASK;
374 |     uint4 halfMasks = select(quadrantOffsets > 0, (~sideMasks.xyxy & horizontalMask.xyxy) << quadrantOffsets, ~(sideMasks.xyxy >> -quadrantOffsets)) & horizontalMask.xyxy;
375 |     uint2 coverageMask = uint2(halfMasks.x | halfMasks.y, halfMasks.z | halfMasks.w);
376 |     coverageMask = (flags & COVERAGE_LINE_FLAGS_TRANSPOSE) ? ~transposeCoverageMask(coverageMask) : coverageMask;
377 |     coverageMask = (flags & COVERAGE_LINE_FLAGS_X_FLIP) ? ~mirrorXCoverageMask(coverageMask) : coverageMask;
378 |     coverageMask = (flags & COVERAGE_LINE_FLAGS_Y_FLIP) ? ~mirrorYCoverageMask(coverageMask) : coverageMask;
379 |     return (flags & COVERAGE_LINE_FLAGS_VALID) ? ~coverageMask : 0u;
380 | }
381 | 
382 | uint2 triangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface, bool isConservative)
383 | {
384 |     uint2 mask0 = coverage::createCoverageMask(coverage::buildLineArea(v0, v1));
385 |     uint2 mask1 = coverage::createCoverageMask(coverage::buildLineArea(v1, v2));
386 |     uint2 mask2 = coverage::createCoverageMask(coverage::buildLineArea(v2, v0));
387 |     uint2 frontMask = (mask0 & mask1 & mask2);
388 |     bool frontMaskValid = any(mask0 != 0) || any(mask1 != 0) || any(mask2 != 0);
389 |     uint2 triangleMask = (showFrontFace * (mask0 & mask1 & mask2)) | ((frontMaskValid && showBackface) * (~mask0 & ~mask1 & ~mask2));
390 | 
391 |     if (isConservative)
392 |     {
393 |         triangleMask |= (triangleMask >> 1) & ~0x80808080u; //left
394 |         triangleMask |= (triangleMask << 1) & ~0x01010101u; //right
395 | 
396 |         //top
397 |         triangleMask.x |= (triangleMask.y << 24) | (triangleMask.x >> 8);
398 |         triangleMask.y |= triangleMask.y >> 8;
399 | 
400 |         //bottom
401 |         triangleMask.y |= (triangleMask.x >> 24) | (triangleMask.y << 8);
402 |         triangleMask.x |= triangleMask.x << 8;
403 |     }
404 | 
405 |     return triangleMask;
406 | }
407 | 
408 | uint2 lineCoverageMask(float2 v0, float2 v1, float thickness, float caps)
409 | {
410 |     float2 lineVector = normalize(v1 - v0);
411 |     float2 D = cross(float3(lineVector, 0.0),float3(0,0,1)).xy * thickness;
412 |     v0 -= caps * lineVector;
413 |     v1 += caps * lineVector;
414 |     
415 |     uint2 mask0 = coverage::createCoverageMask(coverage::buildLineArea(v0 - D, v1 - D));
416 |     uint2 mask1 = coverage::createCoverageMask(coverage::buildLineArea(v1 + D, v0 + D));
417 |     uint2 mask2 = coverage::createCoverageMask(coverage::buildLineArea(v0 + D, v0 - D));
418 |     uint2 mask3 = coverage::createCoverageMask(coverage::buildLineArea(v1 - D, v1 + D));
419 |     return mask0 & mask1 & mask3 & mask2;
420 | }
421 | 
422 | }
423 | 
424 | #endif
425 | 
426 | 


--------------------------------------------------------------------------------
/grr/coverage_lut_tool.hlsl:
--------------------------------------------------------------------------------
  1 | #include "debug_font.hlsl"
  2 | #include "coverage.hlsl"
  3 | 
  4 | // Flags must match coverage_lut_tool.py
  5 | #define SHOW_TRIANGLE (1 << 0)
  6 | #define SHOW_TRIANGLE_BACKFACE (1 << 1)
  7 | #define SHOW_TRIANGLE_FRONTFACE (1 << 2)
  8 | #define SHOW_LINE (1 << 3)
  9 | 
 10 | cbuffer Constants : register(b0)
 11 | {
 12 |     float4 g_size; //w,h,1/w,1/h
 13 |     float4 g_packedV0;
 14 |     float4 g_packedV1;
 15 |     float4 g_packedV2;
 16 |     float4 g_lineArgs;
 17 |     uint4 g_miscArgs;
 18 | }
 19 | 
 20 | struct InputVertices
 21 | {
 22 |     float2 v0;
 23 |     float2 v1;
 24 |     float2 v2;
 25 |     float2 v3;
 26 |     float2 v4;
 27 | 
 28 |     void load()
 29 |     {
 30 |         float2 aspect = float2(g_size.x * g_size.w, 1.0);
 31 |         v0 = g_packedV0.xy * aspect;    
 32 |         v1 = g_packedV0.zw * aspect;    
 33 |         v2 = g_packedV1.xy * aspect;    
 34 |         v3 = g_packedV1.zw * aspect;    
 35 |         v4 = g_packedV2.xy * aspect;    
 36 |     }
 37 | };
 38 | 
 39 | SamplerState g_fontSampler : register(s0);
 40 | Texture2D<float4> g_fontTexture : register(t0);
 41 | RWTexture2D<float4> g_output : register(u0);
 42 | 
 43 | float2 getGridUV(float2 uv)
 44 | {
 45 |     return uv * 8.0;
 46 | }
 47 | 
 48 | float4 drawGrid(float2 uv)
 49 | {
 50 |     float2 gridUV = getGridUV(uv);
 51 |     int2 gridCoord = (int2)gridUV;
 52 |     int gridIndex = gridCoord.y * 8 + gridCoord.x;
 53 |     
 54 |     gridUV = frac(gridUV);
 55 | 
 56 |     float4 numCol = Font::drawNumber(
 57 |         g_fontTexture, g_fontSampler, gridUV * float2(2.0,4.0), 2, gridIndex);
 58 | 
 59 |     float4 col = ((gridCoord.x + (gridCoord.y & 0x1)) & 0x1) ? float4(1,1,1,0.4) : float4(0.5,0.5,0.5,0.4);
 60 | 
 61 |     col.rgb += numCol.rgb * numCol.a;
 62 | 
 63 |     return col;
 64 | }
 65 | 
 66 | float3 drawVertex(float3 col, float2 v, float2 uv)
 67 | {
 68 |     float d = distance(v, uv);
 69 |     if (d < 0.01)
 70 |         return float3(0.8,0.8,0.0);
 71 |     return col;
 72 | }
 73 | 
 74 | float3 drawLine(float3 col, float2 v0, float2 v1, float2 uv)
 75 | {
 76 |     float ldist = distance(v1, v0);
 77 |     float2 lv = v1 - v0;
 78 |     float2 ld = lv/ldist;
 79 |     float2 ruv = uv - v0;
 80 |     float t = dot(ld, ruv);
 81 |     if (t < 0.0 || t > ldist)
 82 |         return col;
 83 | 
 84 |     float2 hitPoint = t * ld + v0;
 85 |     if (distance(hitPoint, uv) < 0.005)
 86 |         return float3(0.0, 0.0, 1.0);
 87 |     return col;
 88 | }
 89 | 
 90 | float3 drawCoverageMask(float3 col, uint2 coverageMask, float2 uv)
 91 | {
 92 |     float2 gridUV = getGridUV(uv);
 93 |     int2 gridCoord = (int2)floor(gridUV);
 94 |     int gridCellId = gridCoord.y * 8 + gridCoord.x;
 95 |     uint shift = gridCellId & 0x1F;
 96 |     if ((1u << shift) & (gridCellId < 32 ? coverageMask.x : coverageMask.y))
 97 |     {
 98 |         float2 cellUv = frac(gridUV);
 99 |         float d = distance(float2(0.5,0.5), cellUv);
100 |         if (d < 0.1)
101 |             return float3(0.0,0.1,0.7);
102 |     }
103 | 
104 |     return col;
105 | }
106 | 
107 | [numthreads(8,8,1)]
108 | void csMain(
109 |     uint2 dispatchThreadID : SV_DispatchThreadID,
110 |     uint groupThreadIndex : SV_GroupIndex)
111 | {
112 |     coverage::genLUT(groupThreadIndex);
113 | 
114 |     GroupMemoryBarrierWithGroupSync();
115 | 
116 |     uint2 pixelCoord = dispatchThreadID.xy;
117 |     float aspect = g_size.x * g_size.w;
118 |     float2 screenUv = float2(pixelCoord) * g_size.zw * float2(aspect, 1.0);
119 |     float2 boardOffset = 0.5 * aspect * float2((g_size.x - g_size.y), 0.0) * g_size.zw;
120 |     float2 boardUv = screenUv - boardOffset;
121 |     float3 color = float3(0,0,0);
122 | 
123 |     uint drawFlags = g_miscArgs.x;
124 | 
125 |     bool showTriangle = drawFlags & SHOW_TRIANGLE;
126 |     bool showLine = drawFlags & SHOW_LINE;
127 | 
128 |     InputVertices verts;
129 |     verts.load();
130 |     if (showTriangle)
131 |     {
132 |         color = drawLine(color, verts.v0, verts.v1, screenUv);
133 |         color = drawLine(color, verts.v1, verts.v2, screenUv);
134 |         color = drawLine(color, verts.v2, verts.v0, screenUv);
135 |         color = drawVertex(color, verts.v0, screenUv);
136 |         color = drawVertex(color, verts.v1, screenUv);
137 |         color = drawVertex(color, verts.v2, screenUv);
138 |     }
139 | 
140 |     if (showLine)
141 |     {
142 |         color = drawLine(color, verts.v3, verts.v4, screenUv);
143 |         color = drawVertex(color, verts.v3, screenUv);
144 |         color = drawVertex(color, verts.v4, screenUv);
145 |     }
146 | 
147 |     //make all uv coordinates relative to board
148 |     verts.v0 -= boardOffset;
149 |     verts.v1 -= boardOffset;
150 |     verts.v2 -= boardOffset;
151 |     verts.v3 -= boardOffset;
152 |     verts.v4 -= boardOffset;
153 | 
154 |     uint2 triangleMask = 0;
155 |     uint2 lineMask = 0;
156 |     if (all(boardUv >= 0.0) && all(boardUv <= 1.0))
157 |     {
158 |         uint2 mask = uint2(0, 1251512);
159 |         float4 gridCol = drawGrid(boardUv);
160 |         color = lerp(color, gridCol.rgb, saturate(gridCol.a));
161 | 
162 |         bool showFrontFace = (drawFlags & SHOW_TRIANGLE_FRONTFACE) != 0;
163 |         bool showBackFace = (drawFlags & SHOW_TRIANGLE_BACKFACE) != 0;
164 |         triangleMask = showTriangle ? coverage::triangleCoverageMask(verts.v0, verts.v1, verts.v2, showFrontFace, showBackFace) : 0;
165 | 
166 |         float lineThickness = g_lineArgs.x;
167 |         float lineCap = g_lineArgs.y;
168 |         lineMask = showLine ? coverage::lineCoverageMask(verts.v3, verts.v4, lineThickness, lineCap) : 0;
169 |         //lineMask = coverage::lineCoverageMask(verts.v3, verts.v4, lineThickness, lineCap);
170 |     }
171 | 
172 |     color = drawCoverageMask(color, triangleMask | lineMask, boardUv);
173 |     g_output[pixelCoord] = float4(color, 1.0);
174 | }
175 | 


--------------------------------------------------------------------------------
/grr/coverage_lut_tool.py:
--------------------------------------------------------------------------------
  1 | import coalpy.gpu as g
  2 | from . import debug_font
  3 | import math
  4 | 
  5 | # Flags must match coverage_lut_tool.hlsl
  6 | class CoverageImageFlags:
  7 |     ShowTriangle  = 1 << 0
  8 |     ShowTriangleBackface  = 1 << 1
  9 |     ShowTriangleFrontface = 1 << 2
 10 |     ShowLine      = 1 << 3
 11 |     
 12 | g_coverage_lut_tool_shader = g.Shader(file="coverage_lut_tool.hlsl", name="coverage_lut_tool", main_function = "csMain")
 13 | 
 14 | class CoverageLUTTool:
 15 |     def __init__(self):
 16 |         self.m_active = False
 17 |         self.m_texture = None
 18 |         self.m_tex_width = 0
 19 |         self.m_tex_height = 0
 20 |         self.m_show_triangle = True
 21 |         self.m_show_triangle_backface = True
 22 |         self.m_show_triangle_frontface = True
 23 |         self.m_show_line = True
 24 |         self.m_v0x = 0.2
 25 |         self.m_v0y = 0.2
 26 |         self.m_v1x = 0.5
 27 |         self.m_v1y = 0.9
 28 |         self.m_v2x = 0.9
 29 |         self.m_v2y = 0.2
 30 |         self.m_v3x = 0.2
 31 |         self.m_v3y = 0.5
 32 |         self.m_v4x = 0.8 
 33 |         self.m_v4y = 0.5
 34 |         self.m_line_thickness = 0.18
 35 |         self.m_line_cap = 0.0
 36 |         self.m_is_focused = False
 37 |         return
 38 | 
 39 |     @property
 40 |     def active(self):
 41 |         return self.m_active
 42 | 
 43 |     @active.setter
 44 |     def active(self, value):
 45 |         self.m_active = value
 46 | 
 47 |     @property
 48 |     def is_focused(self):
 49 |         return self.m_is_focused
 50 | 
 51 |     def render(self):
 52 |         if self.m_tex_width == 0 or self.m_tex_height == 0:
 53 |             return
 54 | 
 55 |         cmd = g.CommandList()
 56 | 
 57 |         flags =  CoverageImageFlags.ShowTriangle if self.m_show_triangle else 0
 58 |         flags |= CoverageImageFlags.ShowTriangleBackface if self.m_show_triangle_backface else 0
 59 |         flags |= CoverageImageFlags.ShowTriangleFrontface if self.m_show_triangle_frontface else 0
 60 |         flags |= CoverageImageFlags.ShowLine if self.m_show_line else 0
 61 | 
 62 |         cmd.dispatch(
 63 |             shader = g_coverage_lut_tool_shader,
 64 |             constants = [
 65 |                 float(self.m_tex_width), float(self.m_tex_height), 1.0/float(self.m_tex_width), 1.0/float(self.m_tex_height),
 66 |                 float(self.m_v0x), float(self.m_v0y), float(self.m_v1x), float(self.m_v1y),
 67 |                 float(self.m_v2x), float(self.m_v2y), float(self.m_v3x), float(self.m_v3y),
 68 |                 float(self.m_v4x), float(self.m_v4y), float(0.0), float(0.0),
 69 |                 float(self.m_line_thickness), float(self.m_line_cap), float(0.0), float(0.0),
 70 |                 int(flags), 0, 0, 0],
 71 |             
 72 |             samplers = [debug_font.font_sampler],
 73 |             inputs = [debug_font.font_texture],
 74 |             outputs = self.m_texture,
 75 | 
 76 |             x = math.ceil(self.m_tex_width/8),
 77 |             y = math.ceil(self.m_tex_height/8),
 78 |             z = 1
 79 |         )
 80 | 
 81 |         g.schedule(cmd)
 82 |         return
 83 | 
 84 |     def build_ui_properties(self, imgui : g.ImguiBuilder):
 85 |         if (imgui.collapsing_header("Coverage lut props", g.ImGuiTreeNodeFlags.DefaultOpen)):
 86 |             self.m_show_triangle = imgui.checkbox("show_triangle", self.m_show_triangle)
 87 |             self.m_show_line = imgui.checkbox("show_line", self.m_show_line)
 88 | 
 89 |         if (self.m_show_triangle and imgui.collapsing_header("Coverage lut tool triangle", g.ImGuiTreeNodeFlags.DefaultOpen)):
 90 |             self.m_show_triangle_backface  = imgui.checkbox("show_triangle_backface", self.m_show_triangle_backface  )
 91 |             self.m_show_triangle_frontface = imgui.checkbox("show_triangle_frontface", self.m_show_triangle_frontface)
 92 |             self.m_v0x = imgui.slider_float(label="tri_v0x", v=self.m_v0x, v_min=0.0, v_max=1.0)
 93 |             self.m_v0y = imgui.slider_float(label="tri_v0y", v=self.m_v0y, v_min=0.0, v_max=1.0)
 94 |             self.m_v1x = imgui.slider_float(label="tri_v1x", v=self.m_v1x, v_min=0.0, v_max=1.0)
 95 |             self.m_v1y = imgui.slider_float(label="tri_v1y", v=self.m_v1y, v_min=0.0, v_max=1.0)
 96 |             self.m_v2x = imgui.slider_float(label="tri_v2x", v=self.m_v2x, v_min=0.0, v_max=1.0)
 97 |             self.m_v2y = imgui.slider_float(label="tri_v2y", v=self.m_v2y, v_min=0.0, v_max=1.0)
 98 | 
 99 |         if (self.m_show_line and imgui.collapsing_header("Coverage lut tool line", g.ImGuiTreeNodeFlags.DefaultOpen)):
100 |             self.m_v3x = imgui.slider_float(label="line_v0x", v=self.m_v3x, v_min=0.0, v_max=1.0)
101 |             self.m_v3y = imgui.slider_float(label="line_v0y", v=self.m_v3y, v_min=0.0, v_max=1.0)
102 |             self.m_v4x = imgui.slider_float(label="line_v1x", v=self.m_v4x, v_min=0.0, v_max=1.0)
103 |             self.m_v4y = imgui.slider_float(label="line_v1y", v=self.m_v4y, v_min=0.0, v_max=1.0)
104 |             self.m_line_thickness = imgui.slider_float(label="thickness", v=self.m_line_thickness, v_min=0.0, v_max=1.0)
105 |             self.m_line_cap = imgui.slider_float(label="cap", v=self.m_line_cap, v_min=0.0, v_max=1.0)
106 | 
107 |     def build_ui(self, imgui : g.ImguiBuilder):
108 |         self.m_active = imgui.begin("Coverage LUT Tool", self.m_active)
109 |         self.m_is_focused = imgui.is_window_focused(flags = g.ImGuiFocusedFlags.RootWindow)
110 |         (cr_min_w, cr_min_h) = imgui.get_cursor_pos()
111 |         (cr_max_w, cr_max_h) = imgui.get_window_content_region_max()
112 |         (nw, nh) = (int(cr_max_w - cr_min_w), int(cr_max_h - cr_min_h))
113 | 
114 |         if nw > 0 and nh > 0 and (nw != self.m_tex_width or nh != self.m_tex_height or self.m_texture is None):
115 |             self.m_tex_width = nw
116 |             self.m_tex_height = nh
117 |             self.m_texture = g.Texture(
118 |                 name = "coverage_lut_tool_target", width = self.m_tex_width, height = self.m_tex_height,
119 |                 format = g.Format.RGBA_8_UNORM)
120 | 
121 |         if self.m_texture != None:
122 |             imgui.image(texture = self.m_texture, size = (self.m_tex_width, self.m_tex_height))
123 | 
124 |         imgui.end()
125 | 


--------------------------------------------------------------------------------
/grr/data/debug_font.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kecho/grr/bc94fc6f001074f5b692a6d06e6d18d22bae172f/grr/data/debug_font.jpg


--------------------------------------------------------------------------------
/grr/data/default-scenes/cube/cube.obj:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f0a445327040f7ae3b7dae6ff391276a42351ce0449fedc4ee42871d3bbb3bf0
3 | size 1122
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/dragon/dragon.obj:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:aaf8d5b5196a821625f3e6b375366d61983ad66f41ab90c722f4268ced32ca3d
3 | size 74094075
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/copyright.txt:
--------------------------------------------------------------------------------
 1 | July 14, 2011 Morgan McGuire modified the model from Crytek's OBJ
 2 | export to correct some small errors.  He computed bump maps from the
 3 | normal maps using <a
 4 | href="http://cs.williams.edu/~morgan/code/">normal2bump.cpp</a> (since
 5 | MTL files expect height bumps, not normals), put the "mask" textures
 6 | into the alpha channel of the associated diffuse texture, cleaned up
 7 | noise in the masks, created the missing gi_flag.tga texture, and
 8 | removed the long untextured banner floating in the middle of the
 9 | atrium that appears in the file but in none of the published images of
10 | the model.  The banner is in banner.obj.
11 | 
12 | 
13 | 
14 | http://www.crytek.com/cryengine/cryengine3/downloads
15 | 
16 | 
17 | Sponza Model
18 | August 19, 2010
19 | The Atrium Sponza Palace, Dubrovnik, is an elegant and improved model created by Frank Meinl. The original Sponza model was created by Marko Dabrovic in early 2002. Over the years, the Sponza Atrium scene has become one of the most popular 3D scenes for testing global illumination and radiosity due to it's specific architectural structure which is particularly complex for global illumination light.
20 | 
21 | However, nowadays it is considered as a simple model, thus it was decided to crate a new model with highly improved appearance and scene complexity. It is donated to the public for  radiosity and is represented in several different formats (3ds, Obj) for use with various commercial 3D applications and renderers.
22 | 
23 | 
24 | Screenshot from the I3D paper
25 | http://crytek.com/sites/default/files/20100301_lpv.pdf
26 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/sponza.mtl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:7e5d765a00bf2af1c0cae1696051fdf04c5bb358cf2bc5cc2634ea8715669d9b
3 | size 6723
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/sponza.obj:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dc9d77fa783772e92f47e67ddef8344858fa14e224711b5dd7d39f7db7042493
3 | size 21109957
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/background.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:21f5955648b003a0046ca16054aef2db07901d73662e8b8141e73ba551e06f87
3 | size 1279899
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/background_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c816df0108455787a9ffba973b01788d3cd2724dbfe1ff148dd1a51df37b2baa
3 | size 211001
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/chain_texture.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:5745cf634538333bcad860e5da4bd57d288166631aadc4b0268fc2103b152242
3 | size 374829
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/chain_texture_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2bcb283948533a96100cb90f1bffa4edb609c887b336ba21be12ec713430933e
3 | size 37014
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/chain_texture_mask.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:43c246a2de3b9bea28add7b188324765443e30223fcee13061719e2d357194e1
3 | size 1150
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/floor_gloss.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:de0f989550e4cf7e0753c7480cd2b533ecb9d61b395033fbc3dbe71140aebaf2
3 | size 1419271
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/lion.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:13c763cb8d01e781121180a2f0c7ddc6c672d0a146ffc1359a5795dba99db5c2
3 | size 1685074
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/lion2_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fcca6af06b7c7ca9b5ef0829db9ad7f2437ddaa2d6484d3811ad9a8b21a81ea3
3 | size 287706
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/lion_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8c9c6c4e42a9160f518f27af3755e8b45e0342882e422d3c6ecb397efcdc8acd
3 | size 283932
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/spnza_bricks_a_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:5ce6e00a0d90f377b8f922f006b8078d72272259b1816e24d511302c0822dfd8
3 | size 613084
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/spnza_bricks_a_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6653d814e78df5b1d72bff0bc4f055b19f40c60125368c66f706862a77b11e19
3 | size 1850821
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/spnza_bricks_a_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:07e6c0d154316dca9f06ac8f30e155786d1b4c9f4f75a5f57901f96338d452f7
3 | size 789292
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_arch_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:7ed13ea591ff9d0c4e950a372e59de8e95dd58af9d0f1fcc5794b87927e8d9ab
3 | size 72484
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_arch_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1a2e127ce784d2d4f061eae58a780cdcc8daba6c0f254d06e5494e41d4874ff2
3 | size 1545689
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_arch_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:cff3751f78b5d8c59eb599477187c7ff2874a76140f442ed52a45e7def08e412
3 | size 519758
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_ceiling_a_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6d6ff12242bd2b0680c45b27d0234cb4fc636959a2575f29dd6eb02251348f4c
3 | size 1748205
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_ceiling_a_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2d8ae36a72774c51128a2810035377b08c7214b9fa1db6d0d4d2229162339828
3 | size 611209
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_column_a_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d88c0f98ac0d37a6c0759962bb797c3f1e9dbf976035308cde4cba92a1156b40
3 | size 314193
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_column_a_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:13c3511250c790ad260845ffd5d56f4622bb3760316831403c3cc182bcf40095
3 | size 1743092
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_column_a_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:66f596b9e37fdaf84b581bb671c40942d59f0dda3470f743b8cbe7928cbc4cab
3 | size 624131
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_column_b_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a5f818c01f960fc43258e5be57c77b717209078a50758d43c6c61b89253c0395
3 | size 306062
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_column_b_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e893c9f0a8ca1cb7b649476eaa6ce98ece4d82b447f2a9858dbf0be4ce05c8e1
3 | size 2127842
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_column_b_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6850e2de861b79d48722fd286f3726755d8d5f5f7c459e0600bc9f9f03c6f5fe
3 | size 625709
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_column_c_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:25badc8f4f8ecb7f982265534d60fd0f0da961ee4f64abfafa8b31ec07534e27
3 | size 322713
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_column_c_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eaf9050e0cb05bc5184b4fc7108085036e049ca6a3a93c0b11ed8e920831472d
3 | size 2021589
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_column_c_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:77641f66babf57ef10d36d4f4357d39b38c510779f87a3e63c0d797c40c425b7
3 | size 662176
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_curtain_blue_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:267f19cc7b95d665965f49117e6ef5cabc62ae65b5c921ca86c5f53ab5f55f97
3 | size 9202020
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_curtain_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dbe47dc0d50b3b5917d94f92c667a1068de776cf1b1af9f931358671a8e8a677
3 | size 8834304
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_curtain_green_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2a1141c908bbbcc66064ef0cd1888d26e4493560ca1a8add94ba96e4413ef0b5
3 | size 8325274
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_details_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:01012e0e81c20b10856dd34476069aadf1270cedd47e13b2760ae7dec341a7ab
3 | size 1342967
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_details_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:405eb35e24f254a9ff4de6f8021f847e92af5d95ad91dfd5c88c239c0bbfeec0
3 | size 618549
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_fabric_blue_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:b7398d4f96ac101b92a42f8e093754fa239bb80e0038727e49574abed5473c20
3 | size 2097497
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_fabric_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1fe1ee45005ddcbcde414096c1018c9296e5dfed528ecc99f9ce7d82862fd963
3 | size 2208126
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_fabric_green_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eb009dd943c52da29e3d7dd4d30e12a912769ffc7659e97dba7a2483fa79c6c3
3 | size 2162350
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_fabric_purple.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1de10bfe0c410f33e3a54aa4987bf7e7c209b3f00190779d1ce73001261bb294
3 | size 1104562
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_fabric_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f44c12f0749297ec5cfa73a6d867b8771cb366eb47233ff989c15f674e3573a0
3 | size 462762
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_flagpole_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c65b8651f12a05063118a91bd2c1ebb37a403936bb2b79226cf842917ebfbcbe
3 | size 1314735
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_flagpole_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e19367ea4099b185b86e9cbf932b54d2c7ec24ec4d7ed281adf8467d9be85af2
3 | size 597329
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_floor_a_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ec677f8d95abc307a7551a501ed20c9cd79bbc38d41963a7d74a35d096d2f1c4
3 | size 1881715
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_floor_a_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1a46518c046ccb196bb25bc72c89546dca8625fc278de6a34256789e84d3cfdc
3 | size 730796
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_roof_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:be5c0e86671a709c5d2f87820f92414bf4c069c5041d2fb548f8bf130f7616dd
3 | size 2280888
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_thorn_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fddd9ab74a4ca2b63feba790102407aaeb1c3c675595f8836e9b97b783f7019c
3 | size 35900
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_thorn_diff.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fbe4564dcfbba7f225f6df00caa92365cf653d6bf496bb3da59b164fa1c84aac
3 | size 450994
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_thorn_mask.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:57f195177f3ff443e43eb7590859feb4c89164107ea69d6f26ee0a1deefa5c7b
3 | size 68251
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/sponza_thorn_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1576e5befee26cebb25be28626980e34d4cfd306968772b1359a8713c7d5cebc
3 | size 360991
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/vase_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e93b7ae479ffee85ea789c75cd4d12e96c813db73d914d13d3403ca379c0eb1f
3 | size 431658
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/vase_dif.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:141d957726bb80708e3210d481c82d4038016cc31e414c541eea76d61e600bd5
3 | size 1739865
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/vase_hanging.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:310b171c5508c7f1df97cc87ebb2139141f4fbf6cd65d6cf1675e92ed64521b6
3 | size 1240906
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/vase_plant.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2e3e7cdcc544beec19338b7e3f33e7814847b1e73294f4bae4c18d3740a7b1e1
3 | size 817124
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/vase_plant_mask.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1d179d56d0750c9e64200bdc17b65c11ff8bda6a593f4d1453df05ade1a70927
3 | size 85746
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/vase_plant_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:b2332e35c817e17fe0185a6c246ac68bd38ad73c33372b86a887362a2e485402
3 | size 716987
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/vase_round.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:5ee2138dc016aff582e50c2725b7749cdf0681537147d3cd9034e0d10414fb7c
3 | size 1798469
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/vase_round_bump.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:0bf21985bf48a5392f7faba4e043b77582512cd88866c9b1cff4ec0939c19b1f
3 | size 77501
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/sponza/textures/vase_round_spec.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dfe9e52cae7cf1fe654ca7088916e29405bde0150812ac419f0da74467c4db0a
3 | size 1697907
4 | 


--------------------------------------------------------------------------------
/grr/data/default-scenes/teapot/teapot.obj:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6a28e0d288d8de9ed6a20dac1d558e93f54040fb37387f97d00ae39a22280dd7
3 | size 220579
4 | 


--------------------------------------------------------------------------------
/grr/debug_font.hlsl:
--------------------------------------------------------------------------------
 1 | #ifndef _DEBUG_FONT_
 2 | #define _DEBUG_FONT_
 3 | 
 4 | #define FONT_BLOCK_SIZE 16.0
 5 | 
 6 | namespace Font
 7 | {
 8 | 
 9 | float4 drawNumber(
10 |     Texture2D<float4> fontTexture,
11 |     SamplerState fontSampler,
12 |     float2 uv,
13 |     int digitsCount,
14 |     int number)
15 | {
16 |     if (any(uv < 0.0) || any(uv > 1.0))
17 |         return float4(0,0,0,0);
18 |     int leadingZeros = 0;
19 |     int leadingZN = number;
20 |     while (leadingZN != 0)
21 |     {
22 |         ++leadingZeros;
23 |         leadingZN /= 10;
24 |     }
25 |     leadingZN = digitsCount - leadingZeros;
26 |     uv.x += (float)leadingZN/(float)digitsCount;
27 |     if (uv.x > 1.0)
28 |         return float4(0,0,0,0);
29 | 
30 |     int currDigit = clamp(digitsCount - (int)(uv.x * digitsCount) - 1.0, 0, digitsCount - 1);
31 | 
32 |     number /= pow(10, currDigit);
33 |     uv.x = fmod(uv.x * digitsCount, 1.0);
34 |     
35 |     float row = 3.0/FONT_BLOCK_SIZE;
36 |     float col = float(number % 10)/FONT_BLOCK_SIZE;
37 |     float2 samplePos = float2(col + uv.x * 1.0/FONT_BLOCK_SIZE, row + uv.y * 1.0/FONT_BLOCK_SIZE);
38 |     float4 val = fontTexture.SampleLevel(fontSampler, samplePos, 0.0);
39 |     return float4(val.rgb, val.r > 0.5 ? 1.0 : 0.0);
40 | }
41 | 
42 | }
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/grr/debug_font.py:
--------------------------------------------------------------------------------
1 | import coalpy.gpu as g
2 | 
3 | font_sampler = g.Sampler(filter_type = g.FilterType.Linear)
4 | font_texture = g.Texture(file = "data/debug_font.jpg")
5 | 


--------------------------------------------------------------------------------
/grr/default_scenes.py:
--------------------------------------------------------------------------------
 1 | from . import gpugeo
 2 | 
 3 | data = {
 4 |     "teapot": "data/default-scenes/teapot/teapot.obj",
 5 |     "cube": "data/default-scenes/cube/cube.obj",
 6 |     "sponza": "data/default-scenes/sponza/sponza.obj",
 7 |     "dragon": "data/default-scenes/dragon/dragon.obj",
 8 |     "simple_triangle" : gpugeo.GpuGeo.load_simple_triangle
 9 | }
10 | 


--------------------------------------------------------------------------------
/grr/depth_utils.hlsl:
--------------------------------------------------------------------------------
 1 | #ifndef __DEPTH_UTILS__
 2 | #define __DEPTH_UTILS__
 3 | 
 4 | #ifndef INVERTED_DEPTH
 5 | #define INVERTED_DEPTH 1
 6 | #endif
 7 | 
 8 | #if INVERTED_DEPTH
 9 | 
10 | #define MAX_DEPTH 0.0
11 | #define MIN_DEPTH 1.0
12 | #define InterlockedMaxDepth(a,b,c) InterlockedMin(a,b,c)
13 | #define InterlockedMinDepth(a,b,c) InterlockedMax(a,b,c)
14 | #define IsDepthLess(a,b) a > b
15 | #define IsDepthLessOrEqual(a,b) a >= b
16 | #define IsDepthGreater(a,b) a < b
17 | #define IsDepthGreaterOrEqual(a,b) a <= b
18 | 
19 | #else
20 | 
21 | #define MAX_DEPTH 1.0
22 | #define MIN_DEPTH 0.0
23 | #define InterlockedMaxDepth(a,b,c) InterlockedMax(a,b,c)
24 | #define InterlockedMinDepth(a,b,c) InterlockedMin(a,b,c)
25 | #define IsDepthLess(a,b) a < b
26 | #define IsDepthLessOrEqual(a,b) a <= b
27 | #define IsDepthGreater(a,b) a > b
28 | #define IsDepthGreaterOrEqual(a,b) a >= b
29 | 
30 | #endif
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/grr/editor.py:
--------------------------------------------------------------------------------
  1 | import coalpy.gpu as g
  2 | import inspect
  3 | import numpy as np
  4 | import os.path
  5 | import sys
  6 | import pathlib
  7 | import pywavefront
  8 | import json
  9 | from . import gpugeo
 10 | from . import default_scenes as scenes
 11 | from . import get_module_path
 12 | from . import camera as c
 13 | from . import transform as t
 14 | from . import profiler as profiler
 15 | from . import coverage_lut_tool
 16 | from . import vec
 17 | 
 18 | 
 19 | class EditorPanel:
 20 |     def __init__(self, name, state):
 21 |         self.name = name
 22 |         self.state = state
 23 | 
 24 | class EditorViewport:
 25 | 
 26 |     def __init__(self, id):
 27 |         self.m_name = "Viewport " + str(id)
 28 |         self.m_texture = None
 29 |         self.m_width = 1920
 30 |         self.m_height = 1080
 31 |         self.m_active = True
 32 |         self.m_is_focused = False
 33 |         self.m_id = id
 34 | 
 35 |         #camera data
 36 |         self.m_editor_camera = c.Camera(1920, 1080)
 37 |         self.reset_camera()
 38 | 
 39 |         #input state
 40 |         self.m_right_pressed = False
 41 |         self.m_left_pressed = False
 42 |         self.m_top_pressed = False
 43 |         self.m_bottom_pressed = False
 44 |         self.m_can_move_pressed = False
 45 |         self.m_can_orbit_pressed = False
 46 |         self.m_last_mouse = (0.0, 0.0)
 47 | 
 48 |         #camera settings
 49 |         self.m_cam_move_speed = 4.0
 50 |         self.m_cam_rotation_speed = 0.1
 51 |         self.m_last_mouse = (0, 0)
 52 |         self.m_curr_mouse = (0, 0)
 53 | 
 54 |         #debug tile settings
 55 |         self.m_debug_coarse_tiles = False
 56 |         self.m_debug_fine_tiles = False
 57 | 
 58 |     def save_editor_state(self):
 59 |         return {
 60 |             'id' : self.m_id,
 61 |             'name' : self.m_name,
 62 |             'debug_coarse_tiles' : self.m_debug_coarse_tiles,
 63 |             'debug_fine_tiles' : self.m_debug_fine_tiles
 64 |         }
 65 | 
 66 |     def load_editor_state(self, json):
 67 |         self.m_id = json['id']
 68 |         self.m_name = json['name']
 69 |         self.m_debug_coarse_tiles = json['debug_coarse_tiles'] if 'debug_coarse_tiles' in json else False
 70 |         self.m_debug_fine_tiles = json['debug_fine_tiles'] if 'debug_fine_tiles' in json else False
 71 | 
 72 |     def build_ui(self, imgui: g.ImguiBuilder):
 73 |         self.m_active = imgui.begin(self.m_name, self.m_active)
 74 |         (cr_min_w, cr_min_h) = imgui.get_window_content_region_min()
 75 |         (cr_max_w, cr_max_h) = imgui.get_window_content_region_max()
 76 |         (nw, nh) = (int(cr_max_w - cr_min_w), int(cr_max_h - cr_min_h))
 77 |         self.m_is_focused = imgui.is_window_focused(flags = g.ImGuiFocusedFlags.RootWindow)
 78 |         if (self.m_active):
 79 |             self._update_inputs(imgui)
 80 |             #update viewport texture
 81 |             if (nw > 0 and nh > 0 and (self.m_texture == None or self.m_width != nw or self.m_height != nh)):
 82 |                 self.m_width = nw;
 83 |                 self.m_height = nh;
 84 |                 self.m_texture = g.Texture(
 85 |                     name = self.m_name, width = self.m_width, height = self.m_height,
 86 |                     format = g.Format.RGBA_8_UNORM)
 87 | 
 88 |             if (self.m_texture != None):
 89 |                 imgui.image(
 90 |                     texture = self.m_texture,
 91 |                     size = (self.m_width, self.m_height))
 92 |         imgui.end() 
 93 |         return self.m_active
 94 | 
 95 |     def reset_camera(self):
 96 |         initial_pos = vec.float3(0, 0, -20)
 97 |         cam = self.m_editor_camera
 98 |         cam.pos = initial_pos
 99 |         cam.rotation = vec.q_from_angle_axis(0, vec.float3(1, 0, 0))
100 |         cam.focus_distance = vec.veclen(initial_pos)
101 |         cam.fov = 20 * t.to_radians()
102 |         cam.near = 0.01
103 |         cam.far = 10000
104 |         cam.update_mats()
105 | 
106 |     def _rotate_transform_mouse_control(self, target_transform, curr_mouse, delta_time, x_axis_sign = 1.0, y_axis_sign = 1.0):
107 |         rot_vec = delta_time * self.m_cam_rotation_speed * vec.float3(curr_mouse[0] - self.m_last_mouse[0], curr_mouse[1] - self.m_last_mouse[1], 0.0)
108 |         y_axis = vec.float3(0, 1, 0)
109 |         qx = vec.q_from_angle_axis(-np.sign(x_axis_sign * rot_vec[0]) * (np.abs(rot_vec[0]) ** 1.2), y_axis)
110 |         target_transform.rotation = (qx * target_transform.rotation)
111 |         
112 |         x_axis = target_transform.right
113 |         qy = vec.q_from_angle_axis(np.sign(y_axis_sign * rot_vec[1]) * (np.abs(rot_vec[1]) ** 1.2), x_axis)
114 |         target_transform.rotation = (qy * target_transform.rotation)
115 | 
116 |     def _get_rel_mouse(self, imgui: g.ImguiBuilder):
117 |         if self.m_width == 0 or self.m_height == 0:
118 |             return (0, 0)
119 |         (ax, ay) = imgui.get_mouse_pos()
120 |         (wx, wy) = imgui.get_cursor_screen_pos()
121 |         return (((ax - wx) + 0.5)/self.m_width, ((ay - wy) + 0.5)/self.m_height)
122 | 
123 |     def _update_inputs(self, imgui : g.ImguiBuilder):
124 |         curr_mouse_pos = self._get_rel_mouse(imgui)
125 |         is_right_click = imgui.is_mouse_down(g.ImGuiMouseButton.Right)
126 | 
127 |         #if is_right_click and curr_mouse_pos[0] >= 0.0 and curr_mouse_pos[0] <= 1.0 and curr_mouse_pos[1] >= 0.0 and curr_mouse_pos[1] <= 1.0:
128 |         if is_right_click and imgui.is_window_hovered():
129 |             imgui.set_window_focus()
130 | 
131 |         if not self.m_is_focused:
132 |             self.m_can_move_pressed = False
133 |             self.m_can_orbit_pressed = False
134 |             return
135 | 
136 |         self.m_right_pressed = imgui.is_key_down(g.ImGuiKey.D)
137 |         self.m_left_pressed = imgui.is_key_down(g.ImGuiKey.A)
138 |         self.m_top_pressed = imgui.is_key_down(g.ImGuiKey.W)
139 |         self.m_bottom_pressed = imgui.is_key_down(g.ImGuiKey.S)
140 |         prev_move_pressed = self.m_can_move_pressed
141 |         prev_orbit_pressed = self.m_can_orbit_pressed
142 |         self.m_can_move_pressed =  is_right_click
143 |         self.m_can_orbit_pressed =  imgui.is_key_down(g.ImGuiKey.LeftAlt) and imgui.is_mouse_down(g.ImGuiMouseButton.Left)
144 |         if prev_move_pressed != self.m_can_move_pressed or prev_orbit_pressed != self.m_can_orbit_pressed:
145 |             self.m_curr_mouse = curr_mouse_pos
146 |             self.m_last_mouse = self.m_curr_mouse
147 | 
148 |         if self.m_can_move_pressed or self.m_can_orbit_pressed:
149 |             self.m_last_mouse = self.m_curr_mouse
150 |             self.m_curr_mouse = curr_mouse_pos
151 | 
152 |     def update(self, delta_time):
153 |         self.m_editor_camera.w = self.m_width
154 |         self.m_editor_camera.h = self.m_height
155 |         if (self.m_can_move_pressed):
156 |             new_pos = self.m_editor_camera.pos
157 |             zero = vec.float3(0, 0, 0)
158 |             cam_transform = self.m_editor_camera.transform
159 |             new_pos = new_pos - ((cam_transform.right * self.m_cam_move_speed) if self.m_right_pressed  else zero)
160 |             new_pos = new_pos + ((cam_transform.right * self.m_cam_move_speed) if self.m_left_pressed   else zero)
161 |             new_pos = new_pos + ((cam_transform.front * self.m_cam_move_speed   ) if self.m_top_pressed    else zero)
162 |             new_pos = new_pos - ((cam_transform.front * self.m_cam_move_speed   ) if self.m_bottom_pressed else zero)
163 |             self.m_editor_camera.pos = new_pos
164 |             self._rotate_transform_mouse_control(cam_transform, self.m_curr_mouse, delta_time)
165 |             self.m_last_mouse = (self.m_curr_mouse[0], self.m_curr_mouse[1])
166 |         elif (self.m_can_orbit_pressed):
167 |             lookat_pos = self.m_editor_camera.focus_point
168 |             lookat_dist = self.m_editor_camera.focus_distance
169 |             cam_transform = self.m_editor_camera.transform
170 |             self._rotate_transform_mouse_control(cam_transform, self.m_curr_mouse, delta_time, -1.0)
171 |             cam_transform.translation = lookat_pos - lookat_dist * cam_transform.front
172 |             cam_transform.update_mats()
173 |             self.m_last_mouse = (self.m_curr_mouse[0], self.m_curr_mouse[1])
174 |             
175 |     @property
176 |     def camera(self):
177 |         return self.m_editor_camera
178 | 
179 |     @property
180 |     def width(self):
181 |         return self.m_width
182 | 
183 |     @property
184 |     def height(self):
185 |         return self.m_height
186 |     
187 |     @property
188 |     def texture(self):
189 |         return self.m_texture
190 | 
191 |     @property
192 |     def id(self):
193 |         return self.m_id
194 | 
195 |     @property
196 |     def name(self):
197 |         return self.m_name
198 | 
199 |     @property
200 |     def is_focused(self):
201 |         return self.m_is_focused
202 | 
203 |     @property
204 |     def debug_coarse_tiles(self):
205 |         return self.m_debug_coarse_tiles
206 | 
207 |     @debug_coarse_tiles.setter
208 |     def debug_coarse_tiles(self, value):
209 |         self.m_debug_coarse_tiles = value
210 | 
211 |     @property
212 |     def debug_fine_tiles(self):
213 |         return self.m_debug_fine_tiles
214 | 
215 |     @debug_fine_tiles.setter
216 |     def debug_fine_tiles(self, value):
217 |         self.m_debug_fine_tiles = value
218 |     
219 | class Editor:
220 |     
221 |     def __init__(self, geo : gpugeo.GpuGeo, default_scene : str):
222 |         self.m_active_scene_name = None
223 |         self.m_active_scene = None
224 |         self.m_geo = geo
225 |         self.m_active_scene = default_scene
226 |         self.m_set_default_layout = False
227 |         self.m_ui_frame_it = 0
228 |         self.m_selected_viewport = None
229 |         self.m_viewports = {}
230 |         self.m_profiler = profiler.Profiler()
231 |         self.m_coverage_lut_tool = coverage_lut_tool.CoverageLUTTool()
232 | 
233 |         self.m_tools = self.createToolPanels()
234 |         self.m_active_scene_name = get_module_path() + scenes.data['teapot']
235 |         self.reload_scene()
236 | 
237 |     def createToolPanels(self):
238 |         return {
239 |             'view_panel' : EditorPanel("View Settings", False),
240 |             'profiler' : EditorPanel("Profiler", False),
241 |             'coverage_lut_tool' : EditorPanel("Coverage LUT Tool", False)
242 |         }
243 | 
244 |     def save_editor_state(self):
245 |         state = {
246 |             'tools_states' : [(k, v.state) for (k, v) in self.m_tools.items()],
247 |             'viewport_states' : [vp.save_editor_state() for vp in self.m_viewports.values()]
248 |         }
249 |         try:
250 |             f = open('editor_state.json', "w")
251 |             f.write(json.dumps(state))
252 |             f.close()
253 |         except Exception as err:
254 |             print("[Editor]: error saving state"+str(err))
255 | 
256 |     def load_editor_state(self):
257 |         try:
258 |             if not os.path.exists('editor_state.json'):
259 |                 return
260 | 
261 |             f = open('editor_state.json', "r")
262 | 
263 |             state = json.loads(f.read())
264 |             if 'tools_states' in state:
265 |                 toolsTuples = state['tools_states']
266 |                 for (tn, tstate) in toolsTuples:
267 |                     if tn in self.m_tools:
268 |                         self.m_tools[tn].state = tstate
269 |             if 'viewport_states' in state:
270 |                 for vp_json in state['viewport_states']:
271 |                     new_vp = EditorViewport(0)
272 |                     new_vp.load_editor_state(vp_json)
273 |                     self.m_viewports[new_vp.id] = new_vp
274 |             f.close()
275 |         except Exception as err:
276 |             print("[Editor]: error loading state"+str(err))
277 | 
278 |     def build_menu_bar(self, imgui : g.ImguiBuilder):
279 |         if (imgui.begin_main_menu_bar()):
280 |             if (imgui.begin_menu("File")):
281 |                 if (imgui.begin_menu("Open")):
282 |                     if (imgui.begin_menu("Default Scenes")):
283 |                         menu_results = [(imgui.menu_item(nm), nm) for nm in scenes.data.keys()]
284 |                         valid_results = [nm for (is_selected, nm) in menu_results if is_selected == True]
285 |                         if valid_results:
286 |                             scene_data = scenes.data[valid_results[0]]
287 |                             if inspect.isfunction(scene_data):
288 |                                 self.m_active_scene_name = "Procedural"
289 |                                 self.m_active_scene = None
290 |                                 scene_data(self.m_geo)
291 |                             else:
292 |                                 self.m_active_scene_name = get_module_path() + scene_data
293 |                                 self.reload_scene()
294 |                         imgui.end_menu()
295 |                     imgui.end_menu()
296 |                 imgui.end_menu()
297 |             if (imgui.begin_menu("Tools")):
298 |                 for t in self.m_tools.values():
299 |                     t.state = True if imgui.menu_item(label = t.name) else t.state
300 |                 imgui.end_menu()
301 |             if (imgui.begin_menu("Window")):
302 |                 if (imgui.menu_item(label = "New Viewport")):
303 |                     vp_id_list = [vp.id for vp in self.m_viewports.values()]
304 |                     next_id = (0 if len(vp_id_list) == 0 else (max(vp_id_list) + 1))
305 |                     new_name = "Viewport " + str(next_id)
306 |                     self.m_viewports[next_id] = EditorViewport(next_id)
307 |                 if (imgui.menu_item(label = "Reset Layout")):
308 |                     self.m_set_default_layout = True
309 |                 imgui.end_menu()
310 |             imgui.end_main_menu_bar()
311 | 
312 |     def build_view_settings_panel(self, imgui : g.ImguiBuilder):
313 |         panel = self.m_tools['view_panel']
314 |         if not panel.state:
315 |             return
316 | 
317 |         panel.state = imgui.begin(panel.name, panel.state)
318 |         if self.m_selected_viewport != None:
319 |             if (imgui.collapsing_header("Camera", g.ImGuiTreeNodeFlags.DefaultOpen)):
320 |                 cam = self.m_selected_viewport.camera
321 |                 imgui.text(self.m_selected_viewport.name)
322 |                 cam.fov = imgui.slider_float(label="fov", v=cam.fov, v_min=0.01 * np.pi, v_max=0.7 * np.pi)
323 |                 cam.near = imgui.slider_float(label="near", v=cam.near, v_min=0.001, v_max=8.0)
324 |                 cam.far = imgui.slider_float(label="far", v=cam.far, v_min=10.0, v_max=90000)
325 | 
326 |                 nx = cam.transform.translation[0]
327 |                 ny = cam.transform.translation[1]
328 |                 nz = cam.transform.translation[2]
329 |                 (nx, ny, nz) = imgui.input_float3(label="pos", v=[nx, ny, nz])
330 |                 cam.transform.translation = [nx, ny, nz]
331 |                 if (imgui.button("reset")):
332 |                     self.m_selected_viewport.reset_camera()
333 | 
334 |                 self.m_selected_viewport.m_cam_move_speed = imgui.slider_float(label="moving speed", v = self.m_selected_viewport.m_cam_move_speed, v_min = 0.01, v_max = 16.0)
335 | 
336 |             if (imgui.collapsing_header("Debug", g.ImGuiTreeNodeFlags.DefaultOpen)):
337 |                 self.m_selected_viewport.debug_coarse_tiles = imgui.checkbox(label = "Show coarse tiles", v = self.m_selected_viewport.debug_coarse_tiles)
338 |                 self.m_selected_viewport.debug_fine_tiles = imgui.checkbox(label = "Show fine tiles", v = self.m_selected_viewport.debug_fine_tiles)
339 |         if self.m_coverage_lut_tool.active:
340 |             self.m_coverage_lut_tool.build_ui_properties(imgui)
341 | 
342 |         imgui.end()
343 | 
344 |     def build_profiler(self, imgui : g.ImguiBuilder, implot : g.ImplotBuilder):
345 |         panel = self.m_tools['profiler']
346 |         if not panel.state:
347 |             return
348 | 
349 |         self.m_profiler.active = True
350 |         self.m_profiler.build_ui(imgui, implot)
351 |         panel.state = self.m_profiler.active
352 | 
353 |     def build_coverage_lut_tool(self, imgui : g.ImguiBuilder):
354 |         panel = self.m_tools['coverage_lut_tool']
355 |         if not panel.state:
356 |             return
357 | 
358 |         self.m_coverage_lut_tool.active = True
359 |         self.m_coverage_lut_tool.build_ui(imgui)
360 |         panel.state = self.m_coverage_lut_tool.active
361 |             
362 |     @property
363 |     def viewports(self):
364 |         return self.m_viewports.values()
365 | 
366 |     @property
367 |     def profiler(self):
368 |         return self.m_profiler
369 | 
370 |     def setup_default_layout(self, root_d_id, imgui : g.ImguiBuilder):
371 |         settings_loaded = imgui.settings_loaded()
372 |         if ((settings_loaded or self.m_ui_frame_it > 0) and not self.m_set_default_layout):
373 |             return
374 | 
375 |         if 0 not in self.m_viewports:
376 |             newVp = EditorViewport(0)
377 |             self.m_viewports[0] = newVp
378 | 
379 |         imgui.dockbuilder_remove_child_nodes(root_d_id)
380 |         (t, l, r) = imgui.dockbuilder_split_node(node_id=root_d_id, split_dir = g.ImGuiDir.Left, split_ratio = 0.2)
381 |         view_panel = self.m_tools['view_panel']
382 |         view_panel.state = True
383 |         self.m_tools['profiler'].state = True
384 |         imgui.dockbuilder_dock_window(view_panel.name, t)
385 |         (b, l, t) = imgui.dockbuilder_split_node(node_id=r, split_dir = g.ImGuiDir.Down, split_ratio = 0.2)
386 |         imgui.dockbuilder_dock_window("Viewport 0", t)
387 |         imgui.dockbuilder_dock_window("Profiler", b)
388 |         imgui.dockbuilder_finish(root_d_id)
389 |         self.m_set_default_layout = False
390 | 
391 | 
392 |     def build_ui(self, imgui : g.ImguiBuilder, implot : g.ImplotBuilder):
393 |         root_d_id = imgui.get_id("RootDock")
394 |         imgui.begin(name="MainWindow", is_fullscreen = True)
395 |         imgui.dockspace(dock_id=root_d_id)
396 |         imgui.end()
397 | 
398 |         self.build_menu_bar(imgui)
399 |         self.build_view_settings_panel(imgui)
400 |         self.build_profiler(imgui, implot)
401 |         self.build_coverage_lut_tool(imgui)
402 |         viewport_objs = [vo for vo in self.m_viewports.values()]
403 |         for vp in viewport_objs: 
404 |             if not vp.build_ui(imgui):
405 |                 if self.m_selected_viewport is not None and vp is self.m_selected_viewport:
406 |                     self.m_selected_viewport = None
407 |                 del self.m_viewports[vp.id]
408 | 
409 |         svp = next((x for x in self.viewports if x.is_focused), None)
410 |         self.m_selected_viewport = self.m_selected_viewport if svp is None else svp
411 |         self.setup_default_layout(root_d_id, imgui)
412 |         self.m_ui_frame_it = self.m_ui_frame_it + 1
413 | 
414 |     def reload_scene(self):
415 |         if self.m_active_scene_name == None:
416 |             return
417 |         print ("[Editor]: loading scene: "+'"'+self.m_active_scene_name+"'")
418 |         try:
419 |             self.m_active_scene = pywavefront.Wavefront(file_name= self.m_active_scene_name, create_materials=True, collect_faces=True)
420 |             self.m_geo.register_wavefront_obj(self.m_active_scene)
421 |         except Exception as err:
422 |             print ("[Editor]: failed parsing scene, reason: " + str(err))
423 | 
424 |     def render_tools(self):
425 |         if not self.m_coverage_lut_tool.active:
426 |             return
427 |         self.m_coverage_lut_tool.render()
428 | 
429 |         
430 | 
431 |         
432 |  
433 | 


--------------------------------------------------------------------------------
/grr/geometry.hlsl:
--------------------------------------------------------------------------------
  1 | #ifndef __GEOMETRY__
  2 | #define __GEOMETRY__
  3 | 
  4 | //Geometry file with utitlies and definitions.
  5 | #include "depth_utils.hlsl"
  6 | 
  7 | namespace geometry
  8 | {
  9 |     //------------------------------------------
 10 |     // Geometric declarations
 11 |     //------------------------------------------
 12 |     struct TriangleI;
 13 |     struct TriangleV;
 14 |     struct TriangleH;
 15 |     struct AABB;
 16 | 
 17 |     float triangleArea(float2 a, float2 b, float2 c);
 18 |     float3 computeBaryCoord(float2 a, float2 b, float2 c, float2 p);
 19 |     float3 computeBaryCoordPerspective(float3 aw, float3 bw, float3 cw, float2 p);
 20 |     bool intersectsSAT(in TriangleH tri, in AABB aabb);
 21 | 
 22 |     //-----------------------------------------
 23 |     // Triangle / geometric types
 24 |     //-----------------------------------------
 25 | 
 26 |     struct Vertex
 27 |     {
 28 |         float3 p;
 29 |         //float3 n;
 30 |         //float2 uv;
 31 |     };
 32 | 
 33 |     //Triangle with indices.
 34 |     struct TriangleI
 35 |     {
 36 |         int a;
 37 |         int b;
 38 |         int c;
 39 |     
 40 |         void load(Buffer<int> indices, int triangleId)
 41 |         {
 42 |             int i = 3 * triangleId;
 43 |             a = indices[i + 0];
 44 |             b = indices[i + 1];
 45 |             c = indices[i + 2];
 46 |         }
 47 |     };
 48 | 
 49 |     // Triangle with Vertices. Vertices are resolved already in registers
 50 |     struct TriangleV
 51 |     {
 52 |         Vertex a;
 53 |         Vertex b;
 54 |         Vertex c;
 55 | 
 56 |         Vertex loadVertex(ByteAddressBuffer vertBuffer, int index)
 57 |         {
 58 |             Vertex v;
 59 |             v.p = asfloat(vertBuffer.Load3((index * 3)  << 2));
 60 |             return v;
 61 |         }
 62 | 
 63 |         void load(ByteAddressBuffer vertices, in TriangleI indices)
 64 |         {
 65 |             a = loadVertex(vertices, indices.a);
 66 |             b = loadVertex(vertices, indices.b);
 67 |             c = loadVertex(vertices, indices.c);
 68 |         }
 69 |     };
 70 | 
 71 |     // Interpolation result with 3 baricenters. See TriangleH::interp
 72 |     struct TriInterpResult
 73 |     {
 74 |         bool isBackface;
 75 |         bool isFrontface;
 76 |         bool visible;
 77 |         float3 bari;
 78 | 
 79 |         float3 eval(float3 a, float3 b, float3 c)
 80 |         {
 81 |             return a * bari.x + b * bari.y + c * bari.z;
 82 |         }
 83 | 
 84 |         float eval(float a, float b, float c)
 85 |         {
 86 |             return a * bari.x + b * bari.y + c * bari.z;
 87 |         }
 88 |     };
 89 | 
 90 |     // Represents a 3d bounding box
 91 |     struct AABB
 92 |     {
 93 |         float3 begin;
 94 |         float3 end;
 95 | 
 96 |         float3 center()
 97 |         {
 98 |             return (begin + end) * 0.5;
 99 |         }
100 | 
101 |         float3 size()
102 |         {
103 |             return end - begin;
104 |         }
105 | 
106 |         float3 extents()
107 |         {
108 |             return size() * 0.5;
109 |         }
110 | 
111 |         bool intersects(AABB other)
112 |         {
113 |             return all(begin < other.end) && all(other.begin < end);
114 |         }
115 |     };
116 | 
117 |     // Represents a single transformed triangle in homogeneous coordinates
118 |     struct TriangleH
119 |     {
120 |         // Homogeneous coordinates before perspective correction
121 |         float4 h0;
122 |         float4 h1;
123 |         float4 h2;
124 | 
125 |         float4 og0;
126 |         float4 og1;
127 |         float4 og2;
128 | 
129 |         // Homogeneous screen coordinates after division by W
130 |         float3 p0;
131 |         float3 p1;
132 |         float3 p2;
133 | 
134 |         uint clipZMask;
135 | 
136 |         void init(TriangleV tri, float4x4 view, float4x4 proj)
137 |         {
138 |             h0 = mul(mul(float4(tri.a.p.xyz, 1.0), view), proj);
139 |             h1 = mul(mul(float4(tri.b.p.xyz, 1.0), view), proj);
140 |             h2 = mul(mul(float4(tri.c.p.xyz, 1.0), view), proj);
141 | 
142 |             og0 = h0;
143 |             og1 = h1;
144 |             og2 = h2;
145 |             clipZMask = 0;
146 |             clipZMask |= clipVert(og2, h0, 1) << 0; 
147 |             clipZMask |= clipVert(og2, h1, 1) << 1; 
148 |             clipZMask |= clipVert(og1, h2, 1) << 2; 
149 |             calculatePoints();
150 |         }
151 | 
152 |         uint clipVert(in float4 dominantVert, inout float4 h, float s)
153 |         {
154 |             if (h.z < h.w)
155 |                 return 0;
156 |             
157 |             float dw = MIN_DEPTH - h.w;
158 |             float2 a = (h.xy - dominantVert.xy)/(h.w - dominantVert.w);
159 |             float ogDepth = h.w;
160 |             h.w = MIN_DEPTH;
161 |             h.xy += s * a * dw;
162 |             return 1;
163 |         }
164 | 
165 |         void calculatePoints()
166 |         {
167 |             p0 = h0.xyz / h0.w;
168 |             p1 = h1.xyz / h1.w;
169 |             p2 = h2.xyz / h2.w;
170 |         }
171 | 
172 |         TriInterpResult interp(float2 hCoords)
173 |         {
174 |             float2 ea = p1.xy - p0.xy;
175 |             float2 eb = p2.xy - p1.xy;
176 |             float2 ec = p0.xy - p2.xy;
177 | 
178 |             float2 pa = hCoords - p0.xy;
179 |             float2 pb = hCoords - p1.xy;
180 |             float2 pc = hCoords - p2.xy;
181 | 
182 |             float wa = ea.x * pa.y - ea.y * pa.x;
183 |             float wb = eb.x * pb.y - eb.y * pb.x;
184 |             float wc = ec.x * pc.y - ec.y * pc.x;
185 | 
186 |             float backFace = -max(wa, max(wb, wc));
187 |             float frontFace = min(wa, min(wb, wc));
188 | 
189 |             TriInterpResult result;
190 |             result.bari = computeBaryCoordPerspective(float3(og0.xy/og0.w,og0.w), float3(og1.xy/og1.w,og1.w), float3(og2.xy/og2.w,og2.w), hCoords);
191 |             //result.bari = computeBaryCoordPerspective(float3(p0.xy,h0.w), float3(p1.xy,h1.w), float3(p2.xy,h2.w), hCoords);
192 |             result.isBackface = backFace > 0.0;
193 |             result.isFrontface = frontFace > 0.0;
194 |             result.visible = result.isBackface || result.isFrontface;
195 |             return result;
196 |         }
197 | 
198 |         AABB aabb()
199 |         {
200 |             AABB val;
201 |             val.begin = min(p0, min(p1, p2));
202 |             val.end = max(p0, max(p1, p2));
203 |             return val;
204 |         }
205 |     };
206 | 
207 |     //-----------------------------
208 |     // Implementations of functions
209 |     //-----------------------------
210 | 
211 |     float triangleArea(float2 a, float2 b, float2 c)
212 |     {
213 |         return 0.5 * (a.x * (b.y - c.y) + b.x * (c.y - a.y) + c.x * (a.y - b.y));
214 |     }
215 | 
216 |     float3 computeBaryCoord(float2 a, float2 b, float2 c, float2 p)
217 |     {
218 |         float totalArea = triangleArea(a, b, c);
219 |         float area1 = triangleArea(b, c, p);
220 |         float area2 = triangleArea(c, a, p);
221 |         float2 bari = float2(area1 / totalArea, area2 / totalArea);
222 |         return float3(bari.x, bari.y, 1.0 - (bari.x + bari.y));
223 |     }
224 | 
225 |     float3 computeBaryCoordPerspective(float3 aw, float3 bw, float3 cw, float2 p)
226 |     {
227 |         float3 b = computeBaryCoord(aw.xy, bw.xy, cw.xy, p);
228 |         float3 B = float3(b.x / aw.z, b.y / bw.z, b.z / cw.z);
229 |         B /= (B.x + B.y + B.z);
230 |         return B;
231 |     }
232 | 
233 |     bool intersectsSATAxis(float3 aabbExtents, float3 axis, float3 v0, float3 v1, float3 v2)
234 |     {
235 |         // Compute the face normals of the AABB, because the AABB
236 |         // is at center, and of course axis aligned, we know that 
237 |         // it's normals are the X, Y and Z axis.
238 |         const float3 u0 = float3(1.0f, 0.0f, 0.0f);
239 |         const float3 u1 = float3(0.0f, 1.0f, 0.0f);
240 |         const float3 u2 = float3(0.0f, 0.0f, 1.0f);
241 | 
242 |         // Testing axis: axis_u0_f0
243 |         // Project all 3 vertices of the triangle onto the Seperating axis
244 |         float p0 = dot(v0, axis);
245 |         float p1 = dot(v1, axis);
246 |         float p2 = dot(v2, axis);
247 | 
248 |         // Project the AABB onto the seperating axis
249 |         // We don't care about the end points of the prjection
250 |         // just the length of the half-size of the AABB
251 |         // That is, we're only casting the extents onto the 
252 |         // seperating axis, not the AABB center. We don't
253 |         // need to cast the center, because we know that the
254 |         // aabb is at origin compared to the triangle!
255 |         float r = aabbExtents.x * abs(dot(u0, axis)) +
256 |                     aabbExtents.y * abs(dot(u1, axis)) +
257 |                     aabbExtents.z * abs(dot(u2, axis));
258 | 
259 |         // Now do the actual test, basically see if either of
260 |         // the most extreme of the triangle points intersects r
261 |         // You might need to write Min & Max functions that take 3 arguments
262 |         if (max(-(max(p0, max(p1, p2))), min(p0, min(p1, p2))) > r) {
263 |             // This means BOTH of the points of the projected triangle
264 |             // are outside the projected half-length of the AABB
265 |             // Therefore the axis is seperating and we can exit
266 |             return false;
267 |         }
268 | 
269 |         return true;
270 |     }
271 | 
272 |     // Intersection between AABB and triangle using SAT algorithm.
273 |     // Translated from https://gdbooks.gitbooks.io/3dcollisions/content/Chapter4/aabb-triangle.html to glsl
274 |     bool intersectsSAT(in TriangleH tri, in AABB aabb)
275 |     {
276 |         // Get the triangle points as vectors
277 |         float3 v0 = tri.p0;
278 |         float3 v1 = tri.p1;
279 |         float3 v2 = tri.p2;
280 | 
281 |         // Convert AABB to center-extents form
282 |         float3 c = aabb.center();
283 |         float3 e = aabb.extents();
284 | 
285 |         // Translate the triangle as conceptually moving the AABB to origin
286 |         // This is the same as we did with the point in triangle test
287 |         v0 -= c;
288 |         v1 -= c;
289 |         v2 -= c;
290 | 
291 |         // Compute the edge vectors of the triangle  (ABC)
292 |         // That is, get the lines between the points as vectors
293 |         float3 f0 = v1 - v0; // B - A
294 |         float3 f1 = v2 - v1; // C - B
295 |         float3 f2 = v0 - v2; // A - C
296 | 
297 |         // Compute the face normals of the AABB, because the AABB
298 |         // is at center, and of course axis aligned, we know that 
299 |         // it's normals are the X, Y and Z axis.
300 |         float3 u0 = float3(1.0f, 0.0f, 0.0f);
301 |         float3 u1 = float3(0.0f, 1.0f, 0.0f);
302 |         float3 u2 = float3(0.0f, 0.0f, 1.0f);
303 | 
304 |         // There are a total of 13 axis to test!
305 | 
306 |         // We first test against 9 axis, these axis are given by
307 |         // cross product combinations of the edges of the triangle
308 |         // and the edges of the AABB. You need to get an axis testing
309 |         // each of the 3 sides of the AABB against each of the 3 sides
310 |         // of the triangle. The result is 9 axis of seperation
311 |         // https://awwapp.com/b/umzoc8tiv/
312 | 
313 |         // Compute the 9 axis
314 |         float3 axis_u0_f0 = cross(u0, f0);
315 |         float3 axis_u0_f1 = cross(u0, f1);
316 |         float3 axis_u0_f2 = cross(u0, f2);
317 | 
318 |         float3 axis_u1_f0 = cross(u1, f0);
319 |         float3 axis_u1_f1 = cross(u1, f1);
320 |         float3 axis_u1_f2 = cross(u2, f2);
321 | 
322 |         float3 axis_u2_f0 = cross(u2, f0);
323 |         float3 axis_u2_f1 = cross(u2, f1);
324 |         float3 axis_u2_f2 = cross(u2, f2);
325 | 
326 |         if (!intersectsSATAxis(e, axis_u0_f0, v0, v1, v2))
327 |             return false;
328 |         if (!intersectsSATAxis(e, axis_u0_f1, v0, v1, v2))
329 |             return false;
330 |         if (!intersectsSATAxis(e, axis_u0_f2, v0, v1, v2))
331 |             return false;
332 |         if (!intersectsSATAxis(e, axis_u1_f0, v0, v1, v2))
333 |             return false;
334 |         if (!intersectsSATAxis(e, axis_u1_f1, v0, v1, v2))
335 |             return false;
336 |         if (!intersectsSATAxis(e, axis_u1_f2, v0, v1, v2))
337 |             return false;
338 |         if (!intersectsSATAxis(e, axis_u2_f1, v0, v1, v2))
339 |             return false;
340 |         if (!intersectsSATAxis(e, axis_u2_f2, v0, v1, v2))
341 |             return false;
342 |         if (!intersectsSATAxis(e, axis_u2_f2, v0, v1, v2))
343 |             return false;
344 | 
345 |         // Next, we have 3 face normals from the AABB
346 |         // for these tests we are conceptually checking if the bounding box
347 |         // of the triangle intersects the bounding box of the AABB
348 |         // that is to say, the seperating axis for all tests are axis aligned:
349 |         // axis1: (1, 0, 0), axis2: (0, 1, 0), axis3 (0, 0, 1)
350 |         if (!aabb.intersects(tri.aabb()))
351 |             return false;
352 | 
353 |         // Finally, we have one last axis to test, the face normal of the triangle
354 |         // We can get the normal of the triangle by crossing the first two line segments
355 |         float3 triangleNormal = cross(f0, f1);
356 |         if (!intersectsSATAxis(e, triangleNormal, v0, v1, v2))
357 |             return false;
358 | 
359 |         // Passed testing for all 13 seperating axis that exist!
360 |         return true;
361 |     }
362 | 
363 |     float2 pixelToUV(int2 pixelCoord, int2 screenSize)
364 |     {
365 |         float2 uv = (pixelCoord + 0.5) / (float2)screenSize.xy;
366 |         return uv;
367 |     }
368 | 
369 |     int2 uvToPixel(float2 uv, int2 screenSize)
370 |     {
371 |         return uv.xy * screenSize;
372 |     }
373 | 
374 |     float2 uvToH(float2 uv)
375 |     {
376 |         return float2(1,1) * (uv * 2.0 - 1.0);
377 |     }
378 | 
379 |     float2 hToUV(float2 hCoord)
380 |     {
381 |         float2 uv = (hCoord * float2(1,1)) * 0.5 + 0.5;
382 |         return uv;
383 |     }
384 | }
385 | 
386 | #endif
387 | 


--------------------------------------------------------------------------------
/grr/gpugeo.py:
--------------------------------------------------------------------------------
 1 | import coalpy.gpu as g
 2 | import array
 3 | import numpy as np
 4 | import math
 5 | 
 6 | class GpuGeo:
 7 | 
 8 |     # 32 megabytes.
 9 |     vertex_pool_byte_size = 32 * 1024 * 1024
10 | 
11 |     # 16 megabytes
12 |     index_pool_byte_size = 16 * 1024 * 1024
13 | 
14 |     # 3 floats (pos) + 3 floats (normal) + 2 floats (uv)
15 |     vertex_format_byte_size = ((4 * 3) + (4 * 3) +  (4 * 2))
16 | 
17 |     #32 bits for now
18 |     index_format_byte_size = 4
19 | 
20 |     def __init__(self):
21 | 
22 |         self.m_vertex_buffer = g.Buffer(
23 |             name ="global_vertex_buffer",
24 |             type = g.BufferType.Raw,
25 |             stride = 4,
26 |             element_count = math.ceil(GpuGeo.vertex_pool_byte_size/4)
27 |         )
28 | 
29 | 
30 |         self.m_index_buffer = g.Buffer(
31 |             name = "global_index_buffer",
32 |             type = g.BufferType.Standard,
33 |             format = g.Format.R32_UINT,
34 |             element_count = math.ceil(GpuGeo.index_pool_byte_size/GpuGeo.index_format_byte_size)
35 |         )
36 | 
37 |         self.triCounts = 0
38 | 
39 |     
40 |     #simple testing function
41 |     def load_simple_triangle(self):
42 |         tri_data = array.array('f', [
43 |              #v.x,  v.y,  v.z,    # uv.x, uv.y, n.x,  n.y,  n.z
44 |               -1.0,  1.0,  2.0,  # 0.0,  0.0,  0.0,  0.0,  1.0,
45 |                1.0,  1.0,  2.0,  # 1.0,  0.0,  0.0,  0.0,  1.0,
46 |                0.0,  -0.5,  -2.0,   # 0.5,  1.0,  0.0,  0.0,  1.0
47 | 
48 |              #v.x,  v.y,  v.z,    # uv.x, uv.y, n.x,  n.y,  n.z
49 |                1.0,  -0.5,  -4.0,  # 0.0,  0.0,  0.0,  0.0,  1.0,
50 |               -1.0,  -0.5,  -4.0,  # 1.0,  0.0,  0.0,  0.0,  1.0,
51 |                0.0,   1.0,  0.0    # 0.5,  1.0,  0.0,  0.0,  1.0
52 |         ])
53 | 
54 |         index_data = [0, 1, 2, 3, 4, 5]
55 | 
56 |         c = g.CommandList()
57 |         c.upload_resource(
58 |             source = tri_data,
59 |             destination = self.m_vertex_buffer            
60 |         )
61 | 
62 |         c.upload_resource(
63 |             source = index_data,
64 |             destination = self.m_index_buffer
65 |         )
66 | 
67 |         g.schedule(c)
68 |         self.triCounts = 2
69 | 
70 |     def register_wavefront_obj(self, wavefront_obj):
71 |         self.triCounts = 0
72 | 
73 |         try:
74 |             vertex_data = np.array(wavefront_obj.vertices, dtype='f')
75 |             index_data = np.array(wavefront_obj.mesh_list[0].faces, dtype='i')
76 |             c = g.CommandList()
77 |             c.upload_resource(source = vertex_data, destination = self.m_vertex_buffer)
78 |             c.upload_resource(source = index_data, destination = self.m_index_buffer)
79 |             g.schedule(c)
80 |             self.triCounts = len(wavefront_obj.mesh_list[0].faces) 
81 |         except Exception as err:
82 |             print("[gpugeo]: Failed uploading wavefront obj to GPU: " + str(err))
83 | 
84 |         
85 | 
86 | 


--------------------------------------------------------------------------------
/grr/overlay.py:
--------------------------------------------------------------------------------
 1 | import coalpy.gpu as g
 2 | import math
 3 | from . import raster
 4 | from . import debug_font
 5 | 
 6 | #enums, must match those in debug_cs.hlsl
 7 | class OverlayFlags:
 8 |     NONE = 0
 9 |     SHOW_COARSE_TILES = 1 << 0
10 |     SHOW_FINE_TILES = 1 << 1
11 | 
12 | #font stuff
13 | g_overlay_shader = g.Shader(file = "overlay_cs.hlsl", name = "main_overlay", main_function = "csMainOverlay")
14 | 
15 | def render_overlay(cmd_list, rasterizer, output_texture, view_settings):
16 |     w = view_settings.width
17 |     h = view_settings.height
18 |     cmd_list.begin_marker("overlay")
19 |     tile_x = math.ceil(w / raster.Rasterizer.coarse_tile_size)
20 |     tile_y = math.ceil(h / raster.Rasterizer.coarse_tile_size)
21 |     overlay_flags = OverlayFlags.NONE
22 |     if view_settings.debug_coarse_tiles:
23 |         overlay_flags |= OverlayFlags.SHOW_COARSE_TILES
24 |     if view_settings.debug_fine_tiles:
25 |         overlay_flags |= OverlayFlags.SHOW_FINE_TILES
26 | 
27 |     cmd_list.dispatch(
28 |         shader = g_overlay_shader,
29 |         constants = [
30 |             int(w), int(h), 0, 0,
31 |             float(tile_x), float(tile_y), int(raster.Rasterizer.coarse_tile_size), int(overlay_flags)
32 |         ],
33 | 
34 |         inputs = [
35 |             debug_font.font_texture,
36 |             rasterizer.visibility_buffer,
37 |             rasterizer.m_total_records_buffer,
38 |             rasterizer.m_bin_counter_buffer,
39 |             rasterizer.m_bin_offsets_buffer,
40 |             rasterizer.m_bin_record_buffer,
41 |             rasterizer.m_fine_tile_counter_buffer],
42 | 
43 |         samplers = debug_font.font_sampler,
44 | 
45 |         outputs = output_texture,
46 |         x = math.ceil(w / 8),
47 |         y = math.ceil(h / 8),
48 |         z = 1)
49 |     cmd_list.end_marker()
50 |     
51 | 


--------------------------------------------------------------------------------
/grr/overlay_cs.hlsl:
--------------------------------------------------------------------------------
  1 | #include "raster_util.hlsl"
  2 | #include "geometry.hlsl"
  3 | #include "debug_font.hlsl"
  4 | 
  5 | #define OVERLAY_FLAGS_NONE 0
  6 | #define OVERLAY_FLAGS_SHOW_COARSE_TILES 1 << 0
  7 | #define OVERLAY_FLAGS_SHOW_FINE_TILES 1 << 1
  8 | 
  9 | SamplerState g_fontSampler : register(s0);
 10 | 
 11 | Texture2D<float4> g_debugFont : register(t0);
 12 | Texture2D<float4> g_colorBuffer : register(t1);
 13 | Buffer<uint> g_totalBins : register(t2);
 14 | Buffer<uint> g_binCounters : register(t3);
 15 | Buffer<uint> g_binOffsets : register(t4);
 16 | StructuredBuffer<raster::BinIntersectionRecord> g_binOutputRecords : register(t5);
 17 | Buffer<uint> g_fineTileCounters : register(t6);
 18 | 
 19 | RWTexture2D<float4> g_output : register(u0);
 20 | 
 21 | #define TILE_SIZE 32.0
 22 | #define BORDER_PIXELS 1.0
 23 | #define BORDER_COLOR float4(0.8, 0.8, 0.8, 0.3)
 24 | #define FONT_COLOR float4(0.8, 0.8, 0.8, 1.0)
 25 | #define TILE_COLOR float4(0, 0, 1.0, 0.3)
 26 | 
 27 | cbuffer Constants : register(b0)
 28 | {
 29 |     int4 g_dims;
 30 |     float g_binTileX;
 31 |     float g_binTileY;
 32 |     int   g_binCoarseTileSize;
 33 |     int   g_overlayFlags;
 34 | }
 35 | 
 36 | float4 drawTile(int2 coord, int tileSize, int tileCount)
 37 | {
 38 |     float borderThickness = BORDER_PIXELS / FONT_BLOCK_SIZE;
 39 |     const int numberOfDigits = 4;
 40 |     float fontSquare = FONT_BLOCK_SIZE/TILE_SIZE;
 41 |     float2 fontBlock = float2(fontSquare * numberOfDigits, fontSquare);
 42 | 
 43 |     int2 tileCoord = int2(coord.x % tileSize, coord.y % tileSize);
 44 |     float2 tileUv = (tileCoord + 0.5) / (float)tileSize;
 45 |     tileUv.y = 1.0 - tileUv.y;
 46 |     float2 borderUvs = abs(tileUv * 2.0 - 1.0) - (1.0 - borderThickness);
 47 |     bool isBorder = any(borderUvs > 0.0);
 48 |     if (isBorder)
 49 |         return BORDER_COLOR;
 50 |     
 51 |     float2 fontTileUv = tileUv - 5.0/TILE_SIZE;
 52 |     bool isFont = all(fontTileUv < fontBlock);
 53 |     fontTileUv *= 1.5;
 54 |     float4 tileColor = TILE_COLOR;
 55 |     if (isFont)
 56 |     {
 57 |         float4 fontCol = Font::drawNumber(g_debugFont, g_fontSampler, fontTileUv / fontBlock, numberOfDigits, tileCount);
 58 |         float4 fontColShadow = Font::drawNumber(g_debugFont, g_fontSampler, (fontTileUv - 2.0 * 1.5/TILE_SIZE) / fontBlock, numberOfDigits, tileCount);
 59 |         tileColor.rgba = lerp(tileColor.rgba, float4(0,0,0,1), fontColShadow.a);
 60 |         tileColor.rgba = lerp(tileColor.rgba, fontCol.rgba, fontCol.a);
 61 |     }
 62 | 
 63 |     return tileColor;
 64 | }
 65 | 
 66 | float3 heatColor(float t)
 67 | {
 68 |     float r = t*t*t;
 69 |     float g = pow(abs(1.0 - abs(1.0 - (2.0 * t))), 3);
 70 |     float b = pow((1-t),3);
 71 |     return float3(r,g,b);
 72 | }
 73 | 
 74 | float4 drawHeatmapLegend(float2 uv, float2 minUv, float2 maxUv)
 75 | {
 76 |     if (any(uv < minUv) || any(uv > maxUv))
 77 |         return float4(0,0,0,0);
 78 | 
 79 |     float fontSquare = FONT_BLOCK_SIZE / TILE_SIZE;
 80 |     float2 txy = (uv - minUv)/(maxUv - minUv);
 81 |     txy.y = 1.0 - txy.y;
 82 | 
 83 |     float2 quadSizePixels = (maxUv - minUv) * float2(g_dims.xy);
 84 |     float2 fontQuad = quadSizePixels/FONT_BLOCK_SIZE;
 85 |     
 86 |     float4 beginFont  = Font::drawNumber(g_debugFont, g_fontSampler, (txy - float2(0.0 , 0))*fontQuad / float2(2,1), 2, 1);
 87 |     float4 middleFont = Font::drawNumber(g_debugFont, g_fontSampler, (txy - float2(0.5 , 0))*fontQuad / float2(3,1), 3, 500) * float4(0.3,0.3,0.3,1.0);
 88 |     float4 endFont    = Font::drawNumber(g_debugFont, g_fontSampler, (txy - float2(1.0 - (4 * FONT_BLOCK_SIZE/quadSizePixels.x), 0))*fontQuad / float2(4,1), 4, 1000);
 89 |     float4 fontCol = float4(0,0,0,0);
 90 |     fontCol = lerp(fontCol, beginFont,  beginFont.a);
 91 |     fontCol = lerp(fontCol, middleFont, middleFont.a);
 92 |     fontCol = lerp(fontCol, endFont,    endFont.a);
 93 |     float4 bgCol = float4(heatColor(txy.x), 0.9);
 94 |     return lerp(bgCol, fontCol, fontCol.a);
 95 | }
 96 | 
 97 | [numthreads(FINE_TILE_SIZE, FINE_TILE_SIZE, 1)]
 98 | void csMainOverlay(int3 dti : SV_DispatchThreadID, int2 groupID : SV_GroupID)
 99 | {
100 |     float3 finalColor = g_colorBuffer[dti.xy].xyz;
101 |     int2 outputCoord = int2(dti.x, g_dims.y - dti.y - 1);
102 |     float2 uv = geometry::pixelToUV(dti.xy, g_dims.xy);
103 | 
104 |     [branch]
105 |     if ((g_overlayFlags & OVERLAY_FLAGS_SHOW_COARSE_TILES) != 0)
106 |     {
107 |         int tileX = groupID.x >> FINE_TILE_TO_TILE_SHIFT;
108 |         int tileY = groupID.y >> FINE_TILE_TO_TILE_SHIFT;
109 |         int tileId = tileY * g_binTileX + tileX;
110 |         uint count = g_binCounters[tileId];
111 |         float4 tileColor = drawTile(uv * g_dims.xy * 1.0, COARSE_TILE_SIZE, count);
112 |         float4 debugBinCol = count != 0 ? tileColor : float4(0,0,0,0);
113 |         finalColor = lerp(finalColor, debugBinCol.xyz, debugBinCol.a);
114 |     }
115 | 
116 |     [branch]
117 |     if ((g_overlayFlags & OVERLAY_FLAGS_SHOW_FINE_TILES) != 0)
118 |     {
119 |         float4 heatmapColor = drawHeatmapLegend(uv, float2(0.07, 0.1), float2(0.93, 0.15));
120 |         float2 fineTileSize = ceil((float2)g_dims.xy / FINE_TILE_SIZE);
121 |         uint tileCounts = g_fineTileCounters[groupID.y * (int)fineTileSize.x + groupID.x];
122 |         float3 tileColor = heatColor(saturate((float)tileCounts / 300.0));
123 |         finalColor = lerp(finalColor, tileColor, 0.7 * (tileCounts > 0 ? 1.0 : 0.0));
124 |         finalColor = lerp(finalColor, heatmapColor.rgb, heatmapColor.a);
125 |     }
126 | 
127 |     g_output[outputCoord] = float4(finalColor, 1.0);
128 | }
129 | 


--------------------------------------------------------------------------------
/grr/prefix_sum.py:
--------------------------------------------------------------------------------
 1 | import coalpy.gpu as g
 2 | from . import utilities as utils
 3 | 
 4 | g_group_size = 128
 5 | g_prefix_sum_group = g.Shader(file = "prefix_sum_cs.hlsl", main_function = "csPrefixSumOnGroup")
 6 | g_prefix_sum_group_exclusive = g.Shader(file = "prefix_sum_cs.hlsl", main_function = "csPrefixSumOnGroup", defines = ["EXCLUSIVE_PREFIX"])
 7 | g_prefix_sum_next_input = g.Shader(file = "prefix_sum_cs.hlsl", main_function = "csPrefixSumNextInput")
 8 | g_prefix_sum_resolve_parent = g.Shader(file = "prefix_sum_cs.hlsl", main_function = "csPrefixSumResolveParent")
 9 | g_prefix_sum_resolve_parent_exclusive = g.Shader(file = "prefix_sum_cs.hlsl", main_function = "csPrefixSumResolveParent", defines = ["EXCLUSIVE_PREFIX"])
10 | 
11 | def allocate_args(input_counts):
12 |     aligned_bin_count = utils.alignup(input_counts, g_group_size)
13 |     reduction_count = 0
14 |     c = input_counts
15 |     perform_reduction = True
16 |     while perform_reduction:
17 |         reduction_count += utils.alignup(c, g_group_size)
18 |         c = utils.divup(c, g_group_size)
19 |         perform_reduction = c > 1
20 | 
21 |     return (g.Buffer(name = "reductionBufferInput", element_count = aligned_bin_count, format = g.Format.R32_UINT),
22 |             g.Buffer(name = "reductionBufferOutput", element_count = reduction_count, format = g.Format.R32_UINT),
23 |             input_counts)
24 | 
25 | def run(cmd_list, input_buffer, prefix_sum_args, is_exclusive = False, input_counts = -1):
26 |     reduction_buffer_in = prefix_sum_args[0]
27 |     reduction_buffer_out = prefix_sum_args[1]
28 |     if (input_counts == -1):
29 |         input_counts = prefix_sum_args[2]
30 |     group_count = input_counts
31 |     perform_reduction = input_counts > 0 
32 |     iteration = 0
33 |     input_count = 0
34 |     input_offset = 0
35 |     output_offset = 0
36 |     pass_list = []
37 |     while perform_reduction:
38 |         input_count = group_count
39 |         group_count = utils.divup(group_count, g_group_size)
40 |         pass_list.append((input_count, output_offset))
41 | 
42 |         cmd_list.dispatch(
43 |             x = group_count, y = 1, z = 1,
44 |             shader = g_prefix_sum_group_exclusive if is_exclusive and iteration == 0 and group_count == 1 else g_prefix_sum_group,           
45 |             inputs = input_buffer if iteration == 0 else reduction_buffer_in,
46 |             outputs = reduction_buffer_out,
47 |             constants = [input_count, 0, output_offset, 0])
48 | 
49 |         perform_reduction = group_count > 1
50 |         if perform_reduction:
51 |             next_group_count = utils.divup(group_count, g_group_size)
52 |             cmd_list.dispatch(
53 |                 x = next_group_count, y = 1, z = 1,
54 |                 shader = g_prefix_sum_next_input,
55 |                 inputs = reduction_buffer_out,
56 |                 outputs = reduction_buffer_in,
57 |                 constants = [0, output_offset, 0, 0])
58 | 
59 |         iteration += 1
60 |         output_offset += utils.alignup(input_count, g_group_size)
61 | 
62 |     for i in range(1, len(pass_list)):
63 |         idx = len(pass_list) - 1 - i
64 |         (parent_count, parent_offset) = pass_list[idx + 1]
65 |         (count, offset) = pass_list[idx]
66 |         const = [0, 0, offset, parent_offset]
67 |         if i == len(pass_list) - 1 and is_exclusive:
68 |             cmd_list.dispatch(
69 |                 x = utils.divup(count, g_group_size), y = 1, z = 1,
70 |                 shader = g_prefix_sum_resolve_parent_exclusive,
71 |                 inputs = input_buffer,
72 |                 outputs = reduction_buffer_out,
73 |                 constants = const)
74 |         else:
75 |             cmd_list.dispatch(
76 |                 x = utils.divup(count, g_group_size), y = 1, z = 1,
77 |                 shader = g_prefix_sum_resolve_parent,
78 |                 outputs = reduction_buffer_out,
79 |                 constants = const)
80 |     return reduction_buffer_out
81 | 


--------------------------------------------------------------------------------
/grr/prefix_sum_cs.hlsl:
--------------------------------------------------------------------------------
 1 | 
 2 | // This value must match the group size in prefux_sum.py
 3 | #define GROUP_SIZE 128
 4 | #define GroupSize GROUP_SIZE
 5 | #include "threading.hlsl"
 6 | 
 7 | Buffer<uint> g_inputBuffer : register(t0);
 8 | RWBuffer<uint> g_outputBuffer : register(u0);
 9 | 
10 | cbuffer ConstantsPrefixSum : register(b0)
11 | {
12 |     int4 g_bufferArgs0;
13 | }
14 | 
15 | #define inputCount g_bufferArgs0.x
16 | #define inputOffset g_bufferArgs0.y
17 | #define outputOffset g_bufferArgs0.z
18 | #define parentOffset g_bufferArgs0.w
19 | 
20 | groupshared uint gs_prefixCache[GROUP_SIZE];
21 | 
22 | [numthreads(GROUP_SIZE, 1, 1)]
23 | void csPrefixSumOnGroup(int3 dispatchThreadID : SV_DispatchThreadID, int groupIndex : SV_GroupIndex)
24 | {
25 |     int threadID = dispatchThreadID.x;
26 |     uint inputVal = threadID >= inputCount ? 0u : g_inputBuffer[threadID + inputOffset];
27 |     Threading::Group group;
28 |     group.init((uint)groupIndex);
29 | 
30 |     uint outputVal, count;
31 |     group.prefixExclusive(inputVal, outputVal, count);
32 | #ifndef EXCLUSIVE_PREFIX
33 |     outputVal += inputVal;
34 | #endif
35 |     g_outputBuffer[threadID + outputOffset] = outputVal;
36 | }
37 | 
38 | [numthreads(GROUP_SIZE, 1, 1)]
39 | void csPrefixSumNextInput(int3 dispatchThreadID : SV_DispatchThreadID, int3 groupID : SV_GroupID)
40 | {
41 |     g_outputBuffer[dispatchThreadID.x] = g_inputBuffer[inputOffset + dispatchThreadID.x * GROUP_SIZE + GROUP_SIZE - 1];
42 | }
43 | 
44 | groupshared uint g_parentSum;
45 | 
46 | [numthreads(GROUP_SIZE, 1, 1)]
47 | void csPrefixSumResolveParent(int3 dispatchThreadID : SV_DispatchThreadID, int groupIndex : SV_GroupIndex, int3 groupID : SV_GroupID)
48 | {
49 |     //if (groupIndex == 0)
50 |     //    g_parentSum = groupID.x == 0 ? 0 : g_outputBuffer[parentOffset + groupID.x - 1];
51 | 
52 |     //no need to do barriers / etc since groupID will trigger a scalar load. We hope!!
53 |     uint parentSum = groupID.x == 0 ? 0 : g_outputBuffer[parentOffset + groupID.x - 1];
54 |     int index = outputOffset + dispatchThreadID.x;
55 | #if EXCLUSIVE_PREFIX
56 |     uint val = g_outputBuffer[index] - g_inputBuffer[index];
57 |     g_outputBuffer[index] = val + parentSum;
58 | #else
59 |     g_outputBuffer[index] += parentSum;
60 | #endif
61 | }
62 | 


--------------------------------------------------------------------------------
/grr/profiler.py:
--------------------------------------------------------------------------------
  1 | import coalpy.gpu as g
  2 | import numpy as nm
  3 | import math as m
  4 | 
  5 | class Profiler:
  6 |     def __init__(self):
  7 |         self.m_active = True
  8 |         self.m_gpu_queue = []
  9 |         self.m_marker_data = []
 10 |         self.m_plot_capacity = 200
 11 |         self.m_curr_tick = 0
 12 |         self.m_gpu_plot_data = nm.zeros((self.m_plot_capacity, 2), dtype='f')
 13 | 
 14 |     @property
 15 |     def active(self):
 16 |         return self.m_active
 17 | 
 18 |     @active.setter
 19 |     def active(self, value):
 20 |         self.m_active = value
 21 | 
 22 |     def build_ui(self, imgui : g.ImguiBuilder, implot : g.ImplotBuilder):
 23 |         self.m_active = imgui.begin("Profiler", self.m_active)
 24 |         if self.m_active and imgui.begin_tab_bar("profiler-tab"):
 25 |             if imgui.begin_tab_item("Timeline"):
 26 |                 self._build_timeline_ui(imgui, implot)
 27 |                 imgui.end_tab_item()
 28 |             if imgui.begin_tab_item("Hierarchy"):
 29 |                 self._build_hierarchy_ui(imgui)
 30 |                 imgui.end_tab_item()
 31 |             if imgui.begin_tab_item("Raw Counters"):
 32 |                 self._build_raw_counter_ui(imgui)
 33 |                 imgui.end_tab_item()
 34 |             imgui.end_tab_bar()
 35 |         imgui.end()
 36 | 
 37 |     def _build_raw_counter_ui(self, imgui : g.ImguiBuilder):
 38 |         titles = ["ID", "ParentID", "Name", "Time", "BeginTimestamp", "EndTimestamp"]
 39 |         imgui.text(f"{titles[0] : <4} {titles[1] : <8} {titles[2] : <32} {titles[3] : ^10} {titles[4] : ^18} {titles[5] : ^18} ")
 40 |         for id in range(0, len(self.m_marker_data)):
 41 |             (name, end_timestamp, begin_timestamp, parent_id) = self.m_marker_data[id]
 42 |             time = end_timestamp - begin_timestamp
 43 |             time_str = "%.4f ms" % (time * 1000)
 44 |             imgui.text(f"{id: <4} {parent_id : <8} {name : <32} {time_str : ^10} {begin_timestamp : ^18} {end_timestamp : ^18} ")
 45 | 
 46 |     def _build_hierarchy_ui(self, imgui : g.ImguiBuilder):
 47 |         if len(self.m_marker_data) == 0:
 48 |             return
 49 | 
 50 |         hierarchy = [(id, []) for id in range(0, len(self.m_marker_data))]
 51 |         node_stack = []
 52 |         for id in range(0, len(self.m_marker_data)):
 53 |             (_, _, _, parent_id) = self.m_marker_data[id]
 54 |             if parent_id != -1:
 55 |                 hierarchy[parent_id][1].append(id)
 56 |             else:
 57 |                 node_stack.append((id, False))
 58 | 
 59 |         node_stack.reverse()
 60 |         for (_, l) in hierarchy:
 61 |             l.reverse()
 62 | 
 63 |         while len(node_stack) > 0:
 64 |             (id, was_visited) = node_stack.pop()
 65 |             if was_visited:
 66 |                 imgui.tree_pop()
 67 |             else:
 68 |                 (name, timestamp_end, timestamp_begin, _) = self.m_marker_data[id]
 69 |                 children = hierarchy[id][1]
 70 |                 flags = (g.ImGuiTreeNodeFlags.Leaf|g.ImGuiTreeNodeFlags.Bullet) if len(children) == 0 else 0
 71 |                 timestamp_str = "%.4f ms" % ((timestamp_end - timestamp_begin) * 1000)
 72 |                 if imgui.tree_node_with_id(id, f"{name : <32}{timestamp_str}", flags):
 73 |                     node_stack.append((id, True)) #set was_visited to True
 74 |                     node_stack.extend([(child_id, False) for child_id in children])
 75 | 
 76 |     def _build_timeline_ui(self, imgui : g.ImguiBuilder, implot : g.ImplotBuilder):
 77 |         if implot.begin_plot("Timeline"):
 78 |             implot.setup_axes("Tick", "Time (ms)", 0, g.ImPlotAxisFlags.AutoFit)
 79 |             implot.setup_axis_limits(g.ImAxis.X1, self.m_curr_tick - self.m_plot_capacity, self.m_curr_tick, g.ImPlotCond.Always)
 80 |             implot.plot_shaded("gpu time", self.m_gpu_plot_data, self.m_plot_capacity, -float('inf'),(self.m_curr_tick % self.m_plot_capacity))
 81 |             implot.end_plot()
 82 | 
 83 |     def begin_capture(self):
 84 |         if not self.active:
 85 |             return
 86 | 
 87 |         g.begin_collect_markers()
 88 | 
 89 |     def end_capture(self):        
 90 |         if not self.active:
 91 |             return
 92 | 
 93 |         marker_gpu_data = g.end_collect_markers()
 94 |         request = g.ResourceDownloadRequest(marker_gpu_data.timestamp_buffer)
 95 |         self.m_gpu_queue.append((marker_gpu_data, request))
 96 | 
 97 |         if self.m_gpu_queue[0][1].is_ready():
 98 |             #extract markers
 99 |             (data, req) = self.m_gpu_queue.pop(0)
100 |             gpu_timestamps = nm.frombuffer(req.data_as_bytearray(), dtype=nm.uint64)
101 |             self.m_marker_data = [ (name, gpu_timestamps[ei]/data.timestamp_frequency, gpu_timestamps[bi]/data.timestamp_frequency, pid) for (name, pid, bi, ei) in data.markers]
102 | 
103 |             #process history
104 |             root_tstamps = [(b, e) for (_, e, b, pid) in self.m_marker_data if pid == -1]
105 |             if len(root_tstamps) > 0:
106 |                 begin_timestamp = min([t for (t, _) in root_tstamps])
107 |                 end_timestamp = max([t for (_, t) in root_tstamps])
108 |                 plot_idx = (self.m_curr_tick % self.m_plot_capacity)
109 |                 self.m_gpu_plot_data[plot_idx][0] = self.m_curr_tick
110 |                 self.m_gpu_plot_data[plot_idx][1] = (end_timestamp - begin_timestamp) * 1000
111 |             self.m_curr_tick = self.m_curr_tick + 1
112 |         
113 | 


--------------------------------------------------------------------------------
/grr/raster.py:
--------------------------------------------------------------------------------
  1 | import coalpy.gpu as g
  2 | import numpy as np
  3 | import math
  4 | from . import gpugeo
  5 | from . import utilities
  6 | from . import prefix_sum
  7 | 
  8 | #enums, must match those in raster_cs.hlsl
  9 | class RasterizerFlags:
 10 |     RASTERIZER_FLAGS_OUTPUT_FINE_RASTER_COUNT = 1 << 0
 11 | 
 12 | g_fine_raster_shader = g.Shader(file = "raster_cs.hlsl", name = "raster_fine_tile", main_function = "csMainFineRaster", defines = ["FINE_RASTER"])
 13 | g_bin_triangle_shader = g.Shader(file = "raster_cs.hlsl", name = "raster_bining", main_function = "csMainBinTriangles" )
 14 | g_bin_elements_args_shader = g.Shader(file = "raster_cs.hlsl", name = "raster_elements_args", main_function = "csWriteBinElementArgsBuffer");
 15 | g_bin_elements_shader = g.Shader(file = "raster_cs.hlsl", name = "raster_elements", main_function = "csMainWriteBinElements");
 16 | 
 17 | class Rasterizer:
 18 | 
 19 |     # triangleId (4b), binOffset (4b), binId (4b). See raster_utils.hlsl
 20 |     bin_intersection_record_byte_size = (4 + 4 + 4) 
 21 | 
 22 |     # single uint buffer, with the triangle ID
 23 |     bin_element_size = 4 
 24 |     bin_record_buffer_byte_size = (256 * 1024 * 1024) 
 25 |     bin_record_buffer_element_count = math.ceil(bin_record_buffer_byte_size / (bin_intersection_record_byte_size + bin_element_size))
 26 | 
 27 |     #coarse tile size in pixels
 28 |     coarse_tile_size = (1 << 5)
 29 | 
 30 |     #coarse tile size in pixels
 31 |     fine_tile_size = (1 << 3)
 32 | 
 33 |     def __init__(self, w, h):
 34 |         self.m_max_w = 0
 35 |         self.m_max_h = 0
 36 |         self.m_total_tiles = 0 
 37 |         self.m_bin_offsets_buffer = None
 38 |         self.m_fine_tile_counter_buffer = None
 39 |         self.m_constant_buffer = None
 40 |         self.update_view(w, h)
 41 |         self.allocate_raster_resources()
 42 |         return
 43 | 
 44 |     def get_tile_size(self, w, h):
 45 |         return (math.ceil(w / Rasterizer.coarse_tile_size), math.ceil(h / Rasterizer.coarse_tile_size))
 46 | 
 47 |     def get_fine_tile_size(self, w, h):
 48 |         return (math.ceil(w / Rasterizer.fine_tile_size), math.ceil(h / Rasterizer.fine_tile_size))
 49 | 
 50 |     def rasterize(self, cmd_list, w, h, view_matrix, proj_matrix, geo, view_settings = None):
 51 | 
 52 |         cmd_list.begin_marker("rasterize")
 53 | 
 54 |         utilities.clear_texture(
 55 |             cmd_list, [0.0, 0.0, 0.0, 0.0],
 56 |             self.m_visibility_buffer, w, h)
 57 | 
 58 |         self.update_view(w, h)
 59 | 
 60 |         flags = 0
 61 |         if view_settings != None:
 62 |             flags |= RasterizerFlags.RASTERIZER_FLAGS_OUTPUT_FINE_RASTER_COUNT if view_settings.debug_fine_tiles else 0
 63 |         self.setup_constants(cmd_list, w, h, view_matrix, proj_matrix, int(geo.triCounts), flags)
 64 | 
 65 |         self.bin_tri_records(
 66 |             cmd_list, w, h, 
 67 |             view_matrix,
 68 |             proj_matrix,
 69 |             geo)
 70 | 
 71 |         self.generate_bin_list(
 72 |             cmd_list, w, h)
 73 | 
 74 |         self.dispatch_fine_raster(
 75 |             cmd_list,
 76 |             w, h,
 77 |             view_matrix,
 78 |             proj_matrix,
 79 |             geo)
 80 | 
 81 |         cmd_list.end_marker()
 82 |         
 83 | 
 84 |     def setup_constants(self, cmd_list, w, h, view_matrix, proj_matrix, triangle_counts, flags):
 85 | 
 86 |         cmd_list.begin_marker("setup_constants")
 87 |         tiles_w, tiles_h = self.get_tile_size(w, h)
 88 |         fine_tiles_w, fine_tiles_h = self.get_fine_tile_size(w, h)
 89 | 
 90 |         const= [
 91 |             float(w), float(h), 1.0/w, 1.0/h,
 92 |             int(w), int(h), int(triangle_counts), flags,
 93 |             float(tiles_w), float(tiles_h), float(fine_tiles_w), float(fine_tiles_h),
 94 |         ]
 95 |         const.extend(view_matrix.flatten().tolist())
 96 |         const.extend(proj_matrix.flatten().tolist())
 97 | 
 98 |         if self.m_constant_buffer is None:
 99 |             self.m_constant_buffer = g.Buffer(
100 |                 name = "ConstantBuffer", type=g.BufferType.Standard,
101 |                 format = g.Format.R32_FLOAT, element_count = len(const), usage = g.BufferUsage.Constant)
102 | 
103 |     
104 |         cmd_list.upload_resource( source = const, destination = self.m_constant_buffer)
105 |         cmd_list.end_marker()
106 | 
107 |     def allocate_raster_resources(self):
108 |         self.m_total_records_buffer = g.Buffer(
109 |                 name = "total_bins_buffer",
110 |                 type = g.BufferType.Standard,
111 |                 format = g.Format.R32_UINT,
112 |                 element_count = 1)
113 | 
114 |         self.m_bin_record_buffer = g.Buffer(
115 |             name = "bin_record_buffer",
116 |             type = g.BufferType.Structured,
117 |             element_count = Rasterizer.bin_record_buffer_element_count,
118 |             stride = Rasterizer.bin_intersection_record_byte_size)
119 | 
120 |         self.m_bin_element_buffer = g.Buffer(
121 |             name = "bin_element_buffer",
122 |             type = g.BufferType.Standard,
123 |             format = g.Format.R32_UINT,
124 |             element_count = Rasterizer.bin_record_buffer_element_count)
125 | 
126 |         self.m_bin_elements_args_buffer = g.Buffer(
127 |             name = "bin_elements_arg_buffer",
128 |             type = g.BufferType.Standard,
129 |             format = g.Format.RGBA_32_UINT,
130 |             element_count = 1)
131 | 
132 |     def update_view(self, w, h):
133 |         if w <= self.m_max_w and h <= self.m_max_h:
134 |             return
135 | 
136 |         self.m_visibility_buffer = g.Texture(
137 |             name = "vis_buffer",
138 |             format = g.Format.RGBA_8_UNORM,
139 |             width = w, height = h)
140 |             
141 |         self.m_max_w = w
142 |         self.m_max_h = h
143 | 
144 |         tiles_w, tiles_h = self.get_tile_size(w, h)
145 | 
146 |         self.m_total_tiles = tiles_w * tiles_h
147 |         self.m_prefix_sum_bins_args = prefix_sum.allocate_args(self.m_total_tiles)
148 | 
149 |         self.m_bin_counter_buffer = g.Buffer(
150 |             name = "bin_coarse_tiles_counter",
151 |             type = g.BufferType.Standard,
152 |             format = g.Format.R32_UINT,
153 |             element_count = tiles_w * tiles_h)
154 | 
155 |         fine_tiles_w, fine_tiles_h = self.get_fine_tile_size(w, h)
156 |         self.m_fine_tile_counter_buffer = g.Buffer(
157 |             name = "fine_tile_counter",
158 |             type = g.BufferType.Standard,
159 |             format = g.Format.R32_UINT,
160 |             element_count = fine_tiles_w * fine_tiles_h)
161 | 
162 |     def clear_counter_buffers(self, cmd_list, w, h):
163 |         tiles_w, tiles_h = self.get_tile_size(w, h)
164 |         utilities.clear_uint_buffer(cmd_list, 0, self.m_bin_counter_buffer, 0, tiles_w * tiles_h)
165 |         utilities.clear_uint_buffer(cmd_list, 0, self.m_total_records_buffer, 0, 1)
166 |         return
167 |     
168 |     def bin_tri_records(
169 |         self,
170 |         cmd_list,
171 |         w, h, view_matrix, proj_matrix,
172 |         gpugeo : gpugeo.GpuGeo):
173 | 
174 |         cmd_list.begin_marker("raster_binning")
175 |         self.clear_counter_buffers(cmd_list, w, h)
176 | 
177 |         tiles_w = math.ceil(w / Rasterizer.coarse_tile_size)
178 |         tiles_h = math.ceil(h / Rasterizer.coarse_tile_size)
179 | 
180 |         cmd_list.dispatch(
181 |             shader = g_bin_triangle_shader,
182 |             constants = self.m_constant_buffer,#const,  
183 | 
184 |             inputs = [
185 |                 gpugeo.m_vertex_buffer,
186 |                 gpugeo.m_index_buffer
187 |             ],
188 | 
189 |             outputs = [
190 |                 self.m_total_records_buffer,
191 |                 self.m_bin_counter_buffer,
192 |                 self.m_bin_record_buffer
193 |             ],
194 | 
195 |             x = math.ceil(gpugeo.triCounts / 64),
196 |             y = 1,
197 |             z = 1)
198 |         cmd_list.end_marker()
199 | 
200 |     def generate_bin_list(self, cmd_list, w, h):
201 | 
202 |         tiles_w = math.ceil(w / Rasterizer.coarse_tile_size)
203 |         tiles_h = math.ceil(h / Rasterizer.coarse_tile_size)
204 | 
205 |         cmd_list.begin_marker("generate_bin_list")
206 | 
207 |         cmd_list.dispatch(
208 |             x = 1, y = 1, z = 1,
209 |             shader = g_bin_elements_args_shader,
210 |             inputs = self.m_total_records_buffer,
211 |             outputs = self.m_bin_elements_args_buffer)
212 | 
213 |         self.m_bin_offsets_buffer = prefix_sum.run(cmd_list, self.m_bin_counter_buffer, self.m_prefix_sum_bins_args, is_exclusive = True, input_counts = tiles_w * tiles_h)
214 | 
215 |         cmd_list.dispatch(
216 |             indirect_args = self.m_bin_elements_args_buffer,
217 |             #x = 1, y = 1, z = 1,
218 |             shader = g_bin_elements_shader,
219 |             inputs = [self.m_total_records_buffer, self.m_bin_offsets_buffer, self.m_bin_record_buffer ],
220 |             outputs = self.m_bin_element_buffer)
221 | 
222 |         cmd_list.end_marker()
223 | 
224 |     def dispatch_fine_raster(
225 |         self,
226 |         cmd_list,
227 |         w, h, view_matrix, proj_matrix,
228 |         gpugeo : gpugeo.GpuGeo):
229 | 
230 |         (fine_tiles_x, fine_tiles_y) = self.get_fine_tile_size(w, h)
231 |         cmd_list.begin_marker("fine_raster")
232 |         cmd_list.dispatch(
233 |             shader = g_fine_raster_shader,
234 |             constants = self.m_constant_buffer,#const,
235 |             inputs = [
236 |                 gpugeo.m_vertex_buffer, 
237 |                 gpugeo.m_index_buffer,
238 |                 self.m_bin_counter_buffer,
239 |                 self.m_bin_offsets_buffer,
240 |                 self.m_bin_element_buffer],
241 |             outputs = [
242 |                 self.m_visibility_buffer,
243 |                 self.m_fine_tile_counter_buffer ],
244 |             x = fine_tiles_x,
245 |             y = fine_tiles_y,
246 |             z = 1)
247 |         cmd_list.end_marker()
248 | 
249 |     @property
250 |     def visibility_buffer(self):
251 |         return self.m_visibility_buffer
252 | 


--------------------------------------------------------------------------------
/grr/raster_cs.hlsl:
--------------------------------------------------------------------------------
  1 | #include "geometry.hlsl"
  2 | #include "raster_util.hlsl"
  3 | #include "coverage.hlsl"
  4 | #include "depth_utils.hlsl"
  5 | 
  6 | #define FINE_TILE_THREAD_COUNT (FINE_TILE_SIZE * FINE_TILE_SIZE)
  7 | #define COARSE_TILE_THREAD_COUNT (COARSE_TILE_SIZE * COARSE_TILE_SIZE)
  8 | 
  9 | #define RASTERIZER_FLAGS_OUTPUT_FINE_RASTER_COUNT 1 << 0
 10 | 
 11 | #ifdef FINE_RASTER
 12 | #define GroupSize FINE_TILE_THREAD_COUNT
 13 | #else
 14 | #define GroupSize COARSE_TILE_THREAD_COUNT
 15 | #endif
 16 | 
 17 | #include "threading.hlsl"
 18 | 
 19 | #define ENABLE_Z 1
 20 | #define ENABLE_FINE_COVERAGE_LUT 1
 21 | 
 22 | //Shared inputs
 23 | ByteAddressBuffer g_verts : register(t0);
 24 | Buffer<int> g_indices : register(t1);
 25 | Buffer<uint> g_rasterBinCounts   : register(t2);
 26 | Buffer<uint> g_rasterBinOffsets  : register(t3);
 27 | Buffer<uint> g_rasterBinTriIds  : register(t4);
 28 | RWTexture2D<float4> g_output  : register(u0);
 29 | RWBuffer<uint> g_outputFineTileCount : register(u1);
 30 | 
 31 | cbuffer Constants : register(b0)
 32 | {
 33 |     float4 g_outputSize;
 34 | 
 35 |     int2   g_outputSizeInts;
 36 |     int g_binTriCounts;
 37 |     int g_flags;
 38 | 
 39 |     float2 g_coarseTileSize;
 40 |     float2 g_fineTileSize;
 41 | 
 42 |     float4x4 g_view;
 43 |     float4x4 g_proj;
 44 | }
 45 | 
 46 | groupshared int gs_tileCount;
 47 | groupshared int gs_tileOffset;
 48 | 
 49 | #define TRIANGLE_CACHE_COUNT FINE_TILE_THREAD_COUNT
 50 | groupshared uint gs_furthestZ;
 51 | groupshared geometry::TriangleH gs_th[TRIANGLE_CACHE_COUNT];
 52 | groupshared uint2 gs_coverage[TRIANGLE_CACHE_COUNT];
 53 | groupshared geometry::AABB gs_tileBounds;
 54 | groupshared uint gs_triangleBatchCount;
 55 | groupshared uint gs_writtenFineTileCount;
 56 | 
 57 | void initFurthestDepth()
 58 | {
 59 |     gs_furthestZ = asuint(MIN_DEPTH);
 60 | }
 61 | 
 62 | void fineTileCullTriangleBatch(int groupThreadIndex, int3 groupID, int2 pixelCoverageCoordinate)
 63 | {
 64 |     float2 pixelCoverageUV = (pixelCoverageCoordinate + 0.5) / (float)FINE_TILE_SIZE;
 65 | 
 66 |     float2 tileOffset = (groupID.xy * (float)FINE_TILE_SIZE);
 67 | 
 68 |     geometry::TriangleH th = (geometry::TriangleH)0;
 69 |     uint2 coverageMask = 0;
 70 |     uint triValid = 0;
 71 |     if (groupThreadIndex < gs_tileCount) 
 72 |     {
 73 |         int triId = g_rasterBinTriIds[groupThreadIndex + gs_tileOffset];
 74 |         geometry::TriangleI ti;
 75 |         ti.load(g_indices, triId);
 76 |         
 77 |         geometry::TriangleV tv;
 78 |         tv.load(g_verts, ti);
 79 |         th.init(tv, g_view, g_proj);
 80 | 
 81 |     #if ENABLE_FINE_COVERAGE_LUT 
 82 |         if (IsDepthLess(asuint(th.aabb().end.z), gs_furthestZ))
 83 |         {
 84 |             float2 v0 = ((((th.p0.xy * 0.5 + 0.5) * (float2)g_outputSizeInts)) - tileOffset) / (float)FINE_TILE_SIZE;
 85 |             float2 v1 = ((((th.p1.xy * 0.5 + 0.5) * (float2)g_outputSizeInts)) - tileOffset) / (float)FINE_TILE_SIZE;
 86 |             float2 v2 = ((((th.p2.xy * 0.5 + 0.5) * (float2)g_outputSizeInts)) - tileOffset) / (float)FINE_TILE_SIZE;
 87 |             v0.y = 1.0 - v0.y;
 88 |             v1.y = 1.0 - v1.y;
 89 |             v2.y = 1.0 - v2.y;
 90 |             coverageMask = coverage::triangleCoverageMask(v0, v1, v2, true, false);
 91 |             triValid = all(coverageMask == 0) ? 0 : 1;
 92 |         }
 93 |         else
 94 |             triValid = 0;
 95 |     #else
 96 |         triValid = IsDepthLess(asuint(th.aabb().end.z), gs_furthestZ) && th.aabb().intersects(gs_tileBounds) ? 1 : 0;
 97 |     #endif
 98 |     } 
 99 | 
100 |     Threading::Group group;
101 |     group.init((uint)groupThreadIndex);
102 |     uint offset, count;
103 |     group.prefixExclusive(triValid, offset, count);
104 |     if (triValid)
105 |     {
106 |         gs_coverage[offset] = coverageMask;
107 |         gs_th[offset] = th;
108 |     }
109 | 
110 |     if (groupThreadIndex == 0)
111 |     {
112 |         gs_triangleBatchCount = count;
113 |         gs_writtenFineTileCount += count;
114 |     }
115 | }
116 | 
117 | void nextTriangleBatch(int groupThreadIndex)
118 | {
119 |     if (groupThreadIndex == 0)
120 |     {
121 |         gs_tileCount -= TRIANGLE_CACHE_COUNT;
122 |         gs_tileOffset += TRIANGLE_CACHE_COUNT;
123 |         initFurthestDepth();
124 |     }
125 | }
126 | 
127 | [numthreads(FINE_TILE_SIZE, FINE_TILE_SIZE, 1)]
128 | void csMainFineRaster(
129 |     int3 dispatchThreadId : SV_DispatchThreadID,
130 |     int3 groupID : SV_GroupID,
131 |     int2 groupThreadID : SV_GroupThreadID,
132 |     int groupThreadIndex : SV_GroupIndex)
133 | {
134 |     coverage::genLUT(groupThreadIndex);
135 |     GroupMemoryBarrierWithGroupSync();
136 | 
137 |     float2 uv = geometry::pixelToUV(dispatchThreadId.xy, g_outputSizeInts);
138 |     float2 hCoords = uv * float2(2.0,2.0) - float2(1.0, 1.0);
139 |     int2 pixelCoverageCoordinate = int2(groupThreadID.x, FINE_TILE_SIZE - groupThreadID.y - 1); 
140 | 
141 |     //hack, clear target
142 |     float4 color = float4(0,0,0,0);
143 |     bool writeColor = false;
144 | 
145 |     if (groupThreadIndex == 0)
146 |     {
147 |         int tileX = groupID.x >> FINE_TILE_TO_TILE_SHIFT;
148 |         int tileY = groupID.y >> FINE_TILE_TO_TILE_SHIFT;
149 |         int tileId = tileY * g_coarseTileSize.x + tileX;
150 |         gs_tileCount = min(g_rasterBinCounts[tileId], 10000);
151 |         gs_tileOffset = g_rasterBinOffsets[tileId];
152 |         gs_tileBounds.begin = float3(geometry::uvToH(geometry::pixelToUV(groupID.xy * FINE_TILE_SIZE, g_outputSize.xy)), 0.0);
153 |         gs_tileBounds.end = float3(geometry::uvToH(geometry::pixelToUV((groupID.xy + int2(1,1)) * FINE_TILE_SIZE, g_outputSize.xy)), 1.0);
154 |         gs_writtenFineTileCount = 0;
155 |         initFurthestDepth();
156 |     }
157 |     
158 |     GroupMemoryBarrierWithGroupSync();
159 | 
160 |     float zBuffer = MAX_DEPTH;
161 |     while (gs_tileCount > 0)
162 |     {
163 |         uint unusedVal;
164 |         InterlockedMaxDepth(gs_furthestZ, asuint(zBuffer), unusedVal);
165 |         GroupMemoryBarrierWithGroupSync();
166 | 
167 |         fineTileCullTriangleBatch(groupThreadIndex, groupID, pixelCoverageCoordinate);
168 |         GroupMemoryBarrierWithGroupSync();
169 | 
170 |         for (uint triIndex = 0; triIndex < gs_triangleBatchCount; ++triIndex)
171 |         {
172 |             #if ENABLE_FINE_COVERAGE_LUT
173 |             uint2 coverageMask = gs_coverage[triIndex];
174 |             int coverageBit = (pixelCoverageCoordinate.x + pixelCoverageCoordinate.y * 8);
175 |             uint coverageHalf = coverageBit >= 32 ? (coverageMask.y & (1u << (coverageBit - 32))) : (coverageMask.x & (1u << coverageBit));
176 |             if (coverageHalf == 0)
177 |                 continue;
178 |             #endif
179 | 
180 |             geometry::TriangleH th = gs_th[triIndex];
181 |             geometry::TriInterpResult interpResult = th.interp(hCoords);
182 |             float3 finalCol = interpResult.eval(float3(1,0,0), float3(0,1,0), float3(0,0,1));
183 |             #if !ENABLE_FINE_COVERAGE_LUT
184 |             if (interpResult.visible)
185 |             #endif
186 |             {
187 |                 float pZ = interpResult.eval(th.h0.z, th.h1.z, th.h2.z);
188 |                 float pW = interpResult.eval(th.h0.w, th.h1.w, th.h2.w);
189 |                 pZ *= rcp(pW);
190 |                 if (IsDepthLess(pZ, zBuffer))
191 |                 {
192 |                     writeColor = true;
193 |                     color.xyz = finalCol;
194 |                     zBuffer = pZ; 
195 |                 }
196 |             }
197 |         }
198 | 
199 |         nextTriangleBatch(groupThreadIndex);
200 |         GroupMemoryBarrierWithGroupSync();
201 |     }
202 | 
203 |     if (groupThreadIndex == 0 && (g_flags & RASTERIZER_FLAGS_OUTPUT_FINE_RASTER_COUNT) != 0)
204 |         g_outputFineTileCount[groupID.y * (int)g_fineTileSize.x + groupID.x] = gs_writtenFineTileCount;
205 | 
206 |     if (writeColor)
207 |         g_output[dispatchThreadId.xy] = color;
208 | }
209 | 
210 | RWBuffer<uint> g_outTotalRecords : register(u0);
211 | RWBuffer<uint> g_binCounters : register(u1);
212 | RWStructuredBuffer<raster::BinIntersectionRecord> g_binOutputRecords : register(u2);
213 | 
214 | [numthreads(64, 1, 1)]
215 | void csMainBinTriangles(int3 dti : SV_DispatchThreadID)
216 | {
217 |     if (dti.x >= g_binTriCounts)
218 |         return;
219 | 
220 |     int triId = dti.x;
221 | 
222 |     geometry::TriangleI ti;
223 |     ti.load(g_indices, triId);
224 | 
225 |     geometry::TriangleV tv;
226 |     tv.load(g_verts, ti);
227 | 
228 |     geometry::TriangleH th;
229 |     th.init(tv, g_view, g_proj);
230 | 
231 |     //if ((th.clipZMask & ((1 << 3) - 1)) != 0)
232 |     //    return;
233 | 
234 |     if (all(abs(th.og0.xyz) > th.og0.w) && all(abs(th.og0.xyz) > th.og0.w) && all(abs(th.og0.xyz) > th.og0.w))
235 |         return;
236 | 
237 |     geometry::AABB aabb = th.aabb();
238 |     if (any(aabb.begin.xy > float2(1,1)) || any(aabb.end.xy < float2(-1,-1)))
239 |         return;
240 | 
241 |     if (any(aabb.extents().xy < g_outputSize.zw))
242 |         return;
243 | 
244 |     int2 tilePointA = (geometry::hToUV(aabb.begin.xy) * g_outputSize.xy) / COARSE_TILE_SIZE;
245 |     int2 tilePointB =   (geometry::hToUV(aabb.end.xy) * g_outputSize.xy) / COARSE_TILE_SIZE;
246 | 
247 |     int2 beginTiles = clamp(min(tilePointA, tilePointB), int2(0,0), int2(g_coarseTileSize) - 1);
248 |     int2 endTiles   = clamp(max(tilePointA, tilePointB), int2(0,0), int2(g_coarseTileSize) - 1);
249 | 
250 |     //go for each tile in this tri
251 |     for (int tileX = beginTiles.x; tileX <= endTiles.x; ++tileX)
252 |     {
253 |         for (int tileY = beginTiles.y; tileY <= endTiles.y; ++tileY)
254 |         {
255 |             int2 tileB = int2(tileX, tileY);
256 |             int2 tileE = tileB + 1;
257 |             geometry::AABB tile;
258 | 
259 |             tile.begin = float3(geometry::uvToH(geometry::pixelToUV(tileB * COARSE_TILE_SIZE, g_outputSize.xy)), MAX_DEPTH);
260 |             tile.end = float3(geometry::uvToH(geometry::pixelToUV(tileE * COARSE_TILE_SIZE, g_outputSize.xy)), MIN_DEPTH);
261 | 
262 |             if (any(aabb.begin.xy > tile.end.xy) || any(aabb.end.xy < tile.begin.xy))
263 |             if (!aabb.intersects(tile))
264 |                 continue;
265 |             
266 |             if (!geometry::intersectsSAT(th, tile))
267 |                 continue;
268 | 
269 |             //TODO: Optimize this by caching into LDS, and writting then 64 tris per batch
270 |             int binId = (tileY * g_coarseTileSize.x + tileX);
271 |             uint binOffset = 0, globalOffset = 0;
272 |             InterlockedAdd(g_binCounters[binId], 1, binOffset);
273 |             InterlockedAdd(g_outTotalRecords[0], 1, globalOffset);
274 | 
275 |             raster::BinIntersectionRecord record;
276 |             record.init(binId, triId, binOffset);
277 |             g_binOutputRecords[globalOffset] = record;
278 |         }
279 |     }
280 | }
281 | 
282 | Buffer<uint> g_totalRecords : register(t0);
283 | Buffer<uint> g_binOffsets : register(t1);
284 | StructuredBuffer<raster::BinIntersectionRecord> g_binRecords : register(t2);
285 | RWBuffer<uint> g_outBinElements : register(u0);
286 | 
287 | RWBuffer<uint4> g_outArgsBuffer : register(u0);
288 | 
289 | [numthreads(1,1,1)]
290 | void csWriteBinElementArgsBuffer()
291 | {
292 |     g_outArgsBuffer[0] = uint4((g_totalRecords[0] + 63)/64,1,1,0);
293 | }
294 | 
295 | groupshared uint gs_totalRecords;
296 | 
297 | [numthreads(64,1,1)]
298 | void csMainWriteBinElements(int3 dispatchThreadId : SV_DispatchThreadID, int groupThreadIndex : SV_GroupIndex)
299 | {
300 |     if (groupThreadIndex == 0)
301 |     {
302 |         gs_totalRecords = g_totalRecords[0];
303 |     }
304 | 
305 |     GroupMemoryBarrierWithGroupSync();
306 | 
307 |     if (dispatchThreadId.x >= gs_totalRecords)
308 |         return;
309 | 
310 |     raster::BinIntersectionRecord record = g_binRecords[dispatchThreadId.x];
311 |     int binIndex = record.tileId;
312 |     int outputIndex = g_binOffsets[binIndex] + record.binOffset;
313 |     g_outBinElements[outputIndex] = record.triangleId;
314 | }
315 | 
316 | 


--------------------------------------------------------------------------------
/grr/raster_util.hlsl:
--------------------------------------------------------------------------------
 1 | #ifndef RASTER_UTIL_H
 2 | #define RASTER_UTIL_H
 3 | 
 4 | #define COARSE_TILE_POW 5
 5 | #define COARSE_TILE_SIZE (1 << (COARSE_TILE_POW))
 6 | 
 7 | #define FINE_TILE_POW 3 
 8 | #define FINE_TILE_SIZE (1 << FINE_TILE_POW)
 9 | 
10 | #define FINE_TILE_TO_TILE_SHIFT (COARSE_TILE_POW - FINE_TILE_POW)
11 | 
12 | namespace raster
13 | {
14 |     //Size must match raster.py
15 |     struct BinIntersectionRecord
16 |     {
17 |         int triangleId;
18 |         int binOffset;
19 |         int tileId;
20 | 
21 |         void init(int inTileId, int triId, int inBinOffset)
22 |         {
23 |             triangleId = triId;
24 |             binOffset = inBinOffset;
25 |             tileId = inTileId;
26 |         }
27 | 
28 |         int3 getIndices()
29 |         {
30 |             int3 baseIdx = triangleId * 3;
31 |             return baseIdx + int3(0, 1, 2);
32 |         }
33 |     };
34 | 
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/grr/test_bench.py:
--------------------------------------------------------------------------------
 1 | import coalpy.gpu as g
 2 | import numpy as np
 3 | import math
 4 | import functools
 5 | from . import prefix_sum as gpu_prefix_sum
 6 | 
 7 | def prefix_sum(input_data, is_exclusive = False):
 8 |     accum = 0
 9 |     output = []
10 |     for i in range(0, len(input_data), 1):
11 |         if is_exclusive:
12 |             output.append(accum)
13 |             accum += input_data[i]
14 |         else:
15 |             accum += input_data[i]
16 |             output.append(accum)
17 |     return output
18 | 
19 | def test_cluster_gen(is_exclusive = False):
20 |     buffersz = 8529
21 |     input_data = np.array([x  for x in range(0, buffersz, 1)], dtype='i')
22 |     test_input_buffer = g.Buffer(format = g.Format.R32_UINT, element_count = buffersz)
23 | 
24 |     reduction_buffers = gpu_prefix_sum.allocate_args(buffersz)
25 | 
26 |     cmd_list = g.CommandList()
27 |     cmd_list.upload_resource(source = input_data, destination = test_input_buffer)
28 |     output = gpu_prefix_sum.run(cmd_list, test_input_buffer, reduction_buffers, is_exclusive)
29 | 
30 |     g.schedule(cmd_list)
31 | 
32 |     dr = g.ResourceDownloadRequest(resource = output)
33 |     dr.resolve()
34 | 
35 |     result = np.frombuffer(dr.data_as_bytearray(), dtype='i')
36 |     result = np.resize(result, buffersz)
37 |     expected = prefix_sum(input_data, is_exclusive)
38 |     correct_count = functools.reduce(lambda x, y: x + y, [1 if x == y else 0 for (x, y) in zip(result, expected)])
39 |     return True if correct_count == len(input_data) else False
40 | 
41 | def run_test(nm, fn):
42 |     result = fn()
43 |     print(nm + " : " + ("PASS" if result else "FAIL"))
44 | 
45 | def test_cluster_gen_inclusive():
46 |     return test_cluster_gen(is_exclusive = False)
47 | 
48 | def test_cluster_gen_exclusive():
49 |     return test_cluster_gen(is_exclusive = True)
50 | 
51 | if __name__ == "__main__":
52 |     run_test("test prefix sum inclusive", test_cluster_gen_inclusive)
53 |     run_test("test prefix sum exclusive", test_cluster_gen_exclusive)
54 | 
55 | 


--------------------------------------------------------------------------------
/grr/transform.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import quaternion
  3 | from . import vec
  4 | 
  5 | class Transform:
  6 | 
  7 |     def Identity():
  8 |         return np.identity(4, dtype='f')
  9 | 
 10 |     __tmp_matrix = np.identity(4, dtype='f')
 11 | 
 12 |     s_DirtyScales = 1 << 0
 13 |     s_DirtyTranslations = 1 << 1
 14 |     s_DirtyRotation = 1 << 1
 15 | 
 16 |     def __init__(self):
 17 |         self.m_rotation = vec.q_from_angle_axis(0.0, vec.float3(1, 0, 0))
 18 |         self.m_translation = vec.float3(0, 0, 0)
 19 |         self.m_scale = vec.float3(1, 1, 1)
 20 | 
 21 |         self.m_transform = Transform.Identity
 22 |         self.m_rotation_matrix = Transform.Identity()
 23 |         self.m_translation_matrix = Transform.Identity()
 24 |         self.m_scale_matrix = Transform.Identity()
 25 |         self.m_transform_matrix = Transform.Identity()
 26 |         self.m_transform_inv_matrix = Transform.Identity()
 27 |         self.m_dirty_flags = 0
 28 |         return;
 29 |             
 30 |     @property
 31 |     def rotation(self):
 32 |         return self.m_rotation.copy()
 33 | 
 34 |     @property
 35 |     def translation(self):
 36 |         return self.m_translation.copy()
 37 | 
 38 |     @property
 39 |     def scale(self):
 40 |         return self.m_scale.copy()
 41 | 
 42 |     @property
 43 |     def right(self):
 44 |         self.update_mats()
 45 |         return self.m_rotation_matrix[0:3, 0].copy()
 46 | 
 47 |     @property
 48 |     def up(self):
 49 |         self.update_mats()
 50 |         return self.m_rotation_matrix[0:3, 1].copy()
 51 | 
 52 |     @property
 53 |     def front(self):
 54 |         self.update_mats()
 55 |         return self.m_rotation_matrix[0:3, 2].copy()
 56 | 
 57 |     @rotation.setter
 58 |     def rotation(self, value : np.quaternion):
 59 |         if (type(value) != np.quaternion):
 60 |             raise ValueError("value for propety must be a numpy quaternion") 
 61 |         self.m_dirty_flags = self.m_dirty_flags | Transform.s_DirtyRotation
 62 |         self.m_rotation = value.copy()
 63 | 
 64 |     @translation.setter
 65 |     def translation(self, value):
 66 |         self.m_dirty_flags = self.m_dirty_flags | Transform.s_DirtyTranslations
 67 |         Transform._set_vec_val(self.m_translation, value)
 68 | 
 69 |     @scale.setter
 70 |     def scale(self, value):
 71 |         self.m_dirty_flags = self.m_dirty_flags | Transform.s_DirtyScales
 72 |         Transform._set_vec_val(self.m_scale, value)
 73 | 
 74 |     @property
 75 |     def translation_matrix(self):
 76 |         self.update_mats()
 77 |         return self.m_translation_matrix
 78 | 
 79 |     @property
 80 |     def rotation_matrix(self):
 81 |         self.update_mats()
 82 |         return self.m_rotation_matrix
 83 | 
 84 |     @property
 85 |     def transform_matrix(self):
 86 |         self.update_mats()
 87 |         return self.m_transform_matrix
 88 | 
 89 |     @property
 90 |     def transform_inv_matrix(self):
 91 |         self.update_mats()
 92 |         return self.m_transform_inv_matrix
 93 | 
 94 |     @property
 95 |     def scale_matrix(self):
 96 |         self.update_mats()
 97 |         return self.m_scale_matrix
 98 | 
 99 |     def _set_vec_val(target, value):
100 |         if type(value) == np.ndarray and value.size == 3:
101 |             target[:] = value[:]
102 |         elif type(value) == list and len(value) == 3:
103 |             target[:] = value
104 |         else:
105 |             raise ValueError("value for propety must be a numpy ndarray of size 3, or a flat array")
106 | 
107 |     def update_mats(self):
108 |         if ((self.m_dirty_flags & Transform.s_DirtyTranslations)):
109 |             self.m_translation_matrix[0,3] = self.m_translation[0]
110 |             self.m_translation_matrix[1,3] = self.m_translation[1]
111 |             self.m_translation_matrix[2,3] = self.m_translation[2]
112 | 
113 |         if ((self.m_dirty_flags & Transform.s_DirtyRotation)):
114 |             self.m_rotation_matrix[0:3, 0:3] = quaternion.as_rotation_matrix(self.m_rotation)
115 | 
116 |         if ((self.m_dirty_flags & Transform.s_DirtyScales)):
117 |             self.m_scale_matrix[0, 0] = self.m_scale[0]
118 |             self.m_scale_matrix[1, 1] = self.m_scale[1]
119 |             self.m_scale_matrix[2, 2] = self.m_scale[2]
120 | 
121 |         if (self.m_dirty_flags != 0):
122 |             np.matmul(self.m_rotation_matrix, self.m_scale_matrix, Transform.__tmp_matrix)
123 |             np.matmul(self.m_translation_matrix, Transform.__tmp_matrix, self.m_transform_matrix)
124 |             self.m_transform_inv_matrix = np.linalg.inv(self.m_transform_matrix)
125 | 
126 |         self.m_dirty_flags = 0
127 | 
128 | def projection_matrix(l, r, t, b, n, f, is_ortho=False):
129 |     mat = Transform.Identity()
130 |     proj_num = 2.0 if is_ortho else (2.0*n)
131 |     r_m_l = r - l;
132 |     t_m_b = t - b;
133 |     f_m_n = f - n;
134 |     mat[0, 0:4] = [proj_num/r_m_l, 0.0, (r+l)/r_m_l, 0.0]
135 |     mat[1, 0:4] = [0.0, proj_num/t_m_b, (t+b)/t_m_b, 0.0]
136 |     if is_ortho:
137 |         # domain goes from 0 to 1 on Z
138 |         mat[2, 0:4] = [0.0, 0.0, -1.0/f_m_n, -n/f_m_n]
139 |     else:
140 |         mat[2, 0:4] = [0.0, 0.0, -f/f_m_n, -(n*f)/f_m_n]
141 |     mat[3, 0:4] = [0.0, 0.0, -1.0, 0.0]
142 |     return mat
143 | 
144 | def projection_matrix_from_aspect(fov, aspect, n, f):
145 |     r = n*np.tan(0.5*fov);
146 |     t = aspect * r;
147 |     return projection_matrix(-r,r,t,-t,n,f)
148 | 
149 | def to_radians():
150 |     return np.pi * 2.0 / 360.0
151 | 


--------------------------------------------------------------------------------
/grr/utilities.py:
--------------------------------------------------------------------------------
 1 | import coalpy.gpu as g
 2 | import math
 3 | from . import get_module_path
 4 | 
 5 | g_clear_target_shader = g.Shader(file = "clear_target_cs.hlsl", name = "clear", main_function = "csMainClear" )
 6 | g_clear_uint_buffer_shader = g.Shader(file = "clear_target_cs.hlsl", name = "clear", main_function = "csMainClearUintBuffer" )
 7 | 
 8 | def clear_texture(cmd_list, color, texture, w, h):
 9 |     cmd_list.dispatch(
10 |         shader = g_clear_target_shader,
11 |         constants = color,
12 |         x = math.ceil(w / 8), 
13 |         y = math.ceil(h / 8), 
14 |         z = 1,
15 |         outputs = texture)
16 | 
17 | def clear_uint_buffer(cmd_list, clear_val, buff, el_offset, el_count):
18 |     cmd_list.dispatch(
19 |         shader = g_clear_uint_buffer_shader,
20 |         constants = [int(clear_val), int(el_offset), int(el_count)],
21 |         outputs = buff,
22 |         x = math.ceil(el_count / 64),
23 |         y = 1,
24 |         z = 1)
25 | 
26 | 
27 | def divup(a, b):
28 |     return int((a + b - 1)/b)
29 | 
30 | def alignup(a, b):
31 |     return divup(a, b) * b
32 | 


--------------------------------------------------------------------------------
/grr/vec.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import quaternion
 3 | 
 4 | def float3(x, y, z):
 5 |     return np.array([x, y, z], dtype='f')
 6 | 
 7 | def float4(x, y, z, w):
 8 |     return np.array([x, y, z, w], dtype='f')
 9 | 
10 | def veclen(v):
11 |     return np.sqrt(np.sum(v ** 2))
12 | 
13 | def normalize(v):
14 |     v[:] = v[:] / veclen(v)
15 |     return v
16 | 
17 | def q_from_angle_axis(angle, axis):
18 |     a = np.sin(angle) * normalize(axis)
19 |     w = np.cos(angle)
20 |     return np.quaternion(w, a[0], a[1], a[2])
21 |     
22 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | numpy-quaternion
3 | PyWavefront
4 | 


--------------------------------------------------------------------------------