├── LICENSE.txt
├── README.md
├── docs
    ├── FFX_Denoiser_Reflection_Technology.pdf
    └── FFX_Denoiser_Shadows_Technology.pdf
├── ffx-reflection-dnsr
    ├── ffx_denoiser_reflections_common.h
    ├── ffx_denoiser_reflections_config.h
    ├── ffx_denoiser_reflections_prefilter.h
    ├── ffx_denoiser_reflections_reproject.h
    └── ffx_denoiser_reflections_resolve_temporal.h
└── ffx-shadows-dnsr
    ├── ffx_denoiser_shadows_filter.h
    ├── ffx_denoiser_shadows_prepare.h
    ├── ffx_denoiser_shadows_tileclassification.h
    └── ffx_denoiser_shadows_util.h


/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FidelityFX Denoiser
 2 | 
 3 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
18 | THE SOFTWARE.
19 | 
20 | ## Overview
21 | 
22 | FidelityFX Denoiser contains a collection of highly optimized denoiser implementations for specific use cases.
23 | 
24 | ## FidelityFX Shadow Denoiser
25 | 
26 | A spatio-temporal denoiser for raytraced soft shadows. It is intended to be used on a shadow mask that was created from at most one jittered shadow ray per pixel.
27 | It makes use of a tile classification pass to skip work on areas without spatial variance in the shadow mask.
28 | In cases of low temporal sample counts, the contribution from the spatial filters are increased, which successively cools off as the temporal sample count increases.
29 | The denoiser aims to avoid ghosting artifacts by analyzing the local pixel neighborhood and clamping the accumulated history.
30 | 
31 | ### Links
32 | 
33 | - ffx-shadows-dnsr contains the [Shadow Denoiser](https://github.com/GPUOpen-Effects/FidelityFX-Denoiser/tree/master/ffx-shadows-dnsr)
34 | 
35 | ## FidelityFX Reflection Denoiser
36 | 
37 | The reflection denoiser includes a high performance spatio-temporal denoiser specialized for reflection denoising.
38 | The preferred use case of this denoiser is within applications requiring denoised radiance values generated by some stochastic reflection implementation.
39 | Examples of stochastic reflections:
40 | - Stochastic Screen Space Reflections
41 | - Stochastic Raytraced Reflections
42 | 
43 | ### Links
44 | 
45 | - ffx-reflection-dnsr contains the [Reflection Denoiser](https://github.com/GPUOpen-Effects/FidelityFX-Denoiser/tree/master/ffx-reflection-dnsr)
46 | - Visit [FidelityFX SSSR](https://github.com/GPUOpen-Effects/FidelityFX-SSSR/tree/master/sample) to see the reflection denoiser in action.
47 | 
48 | 


--------------------------------------------------------------------------------
/docs/FFX_Denoiser_Reflection_Technology.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GPUOpen-Effects/FidelityFX-Denoiser/d7dfecbabe7b9523b14e7b067216e06b86e8d189/docs/FFX_Denoiser_Reflection_Technology.pdf


--------------------------------------------------------------------------------
/docs/FFX_Denoiser_Shadows_Technology.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GPUOpen-Effects/FidelityFX-Denoiser/d7dfecbabe7b9523b14e7b067216e06b86e8d189/docs/FFX_Denoiser_Shadows_Technology.pdf


--------------------------------------------------------------------------------
/ffx-reflection-dnsr/ffx_denoiser_reflections_common.h:
--------------------------------------------------------------------------------
  1 | /**********************************************************************
  2 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | ********************************************************************/
 22 | 
 23 | #ifndef FFX_DNSR_REFLECTIONS_COMMON
 24 | #define FFX_DNSR_REFLECTIONS_COMMON
 25 | 
 26 | #include "ffx_denoiser_reflections_config.h"
 27 | 
 28 | uint FFX_DNSR_Reflections_BitfieldExtract(uint src, uint off, uint bits) {
 29 |     uint mask = (1 << bits) - 1;
 30 |     return (src >> off) & mask;
 31 | }
 32 | 
 33 | uint FFX_DNSR_Reflections_BitfieldInsert(uint src, uint ins, uint bits) {
 34 |     uint mask = (1 << bits) - 1;
 35 |     return (ins & mask) | (src & (~mask));
 36 | }
 37 | 
 38 | //  LANE TO 8x8 MAPPING
 39 | //  ===================
 40 | //  00 01 08 09 10 11 18 19
 41 | //  02 03 0a 0b 12 13 1a 1b
 42 | //  04 05 0c 0d 14 15 1c 1d
 43 | //  06 07 0e 0f 16 17 1e 1f
 44 | //  20 21 28 29 30 31 38 39
 45 | //  22 23 2a 2b 32 33 3a 3b
 46 | //  24 25 2c 2d 34 35 3c 3d
 47 | //  26 27 2e 2f 36 37 3e 3f
 48 | uint2 FFX_DNSR_Reflections_RemapLane8x8(uint lane) {
 49 |     return uint2(FFX_DNSR_Reflections_BitfieldInsert(FFX_DNSR_Reflections_BitfieldExtract(lane, 2u, 3u), lane, 1u),
 50 |                  FFX_DNSR_Reflections_BitfieldInsert(FFX_DNSR_Reflections_BitfieldExtract(lane, 3u, 3u), FFX_DNSR_Reflections_BitfieldExtract(lane, 1u, 2u), 2u));
 51 | }
 52 | 
 53 | min16float FFX_DNSR_Reflections_Luminance(min16float3 color) { return max(dot(color, float3(0.299, 0.587, 0.114)), 0.001); }
 54 | 
 55 | min16float FFX_DNSR_Reflections_ComputeTemporalVariance(min16float3 history_radiance, min16float3 radiance) {
 56 |     min16float history_luminance = FFX_DNSR_Reflections_Luminance(history_radiance);
 57 |     min16float luminance         = FFX_DNSR_Reflections_Luminance(radiance);
 58 |     min16float diff              = abs(history_luminance - luminance) / max(max(history_luminance, luminance), 0.5);
 59 |     return diff * diff;
 60 | }
 61 | 
 62 | uint FFX_DNSR_Reflections_PackFloat16(min16float2 v) {
 63 |     uint2 p = f32tof16(float2(v));
 64 |     return p.x | (p.y << 16);
 65 | }
 66 | 
 67 | min16float2 FFX_DNSR_Reflections_UnpackFloat16(uint a) {
 68 |     float2 tmp = f16tof32(uint2(a & 0xFFFF, a >> 16));
 69 |     return min16float2(tmp);
 70 | }
 71 | 
 72 | uint2 FFX_DNSR_Reflections_PackFloat16_4(min16float4 v) { return uint2(FFX_DNSR_Reflections_PackFloat16(v.xy), FFX_DNSR_Reflections_PackFloat16(v.zw)); }
 73 | 
 74 | min16float4 FFX_DNSR_Reflections_UnpackFloat16_4(uint2 a) { return min16float4(FFX_DNSR_Reflections_UnpackFloat16(a.x), FFX_DNSR_Reflections_UnpackFloat16(a.y)); }
 75 | 
 76 | // Rounds value to the nearest multiple of 8
 77 | uint2 FFX_DNSR_Reflections_RoundUp8(uint2 value) {
 78 |     uint2 round_down = value & ~0b111;
 79 |     return (round_down == value) ? value : value + 8;
 80 | }
 81 | 
 82 | // From "Temporal Reprojection Anti-Aliasing"
 83 | // https://github.com/playdeadgames/temporal
 84 | /**********************************************************************
 85 | Copyright (c) [2015] [Playdead]
 86 | 
 87 | Permission is hereby granted, free of charge, to any person obtaining a copy
 88 | of this software and associated documentation files (the "Software"), to deal
 89 | in the Software without restriction, including without limitation the rights
 90 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 91 | copies of the Software, and to permit persons to whom the Software is
 92 | furnished to do so, subject to the following conditions:
 93 | 
 94 | The above copyright notice and this permission notice shall be included in all
 95 | copies or substantial portions of the Software.
 96 | 
 97 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 98 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 99 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
100 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
101 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
102 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
103 | SOFTWARE.
104 | ********************************************************************/
105 | min16float3 FFX_DNSR_Reflections_ClipAABB(min16float3 aabb_min, min16float3 aabb_max, min16float3 prev_sample) {
106 |     // Main idea behind clipping - it prevents clustering when neighbor color space
107 |     // is distant from history sample
108 | 
109 |     // Here we find intersection between color vector and aabb color box
110 | 
111 |     // Note: only clips towards aabb center
112 |     float3 aabb_center = 0.5 * (aabb_max + aabb_min);
113 |     float3 extent_clip = 0.5 * (aabb_max - aabb_min) + 0.001;
114 | 
115 |     // Find color vector
116 |     float3 color_vector = prev_sample - aabb_center;
117 |     // Transform into clip space
118 |     float3 color_vector_clip = color_vector / extent_clip;
119 |     // Find max absolute component
120 |     color_vector_clip       = abs(color_vector_clip);
121 |     min16float max_abs_unit = max(max(color_vector_clip.x, color_vector_clip.y), color_vector_clip.z);
122 | 
123 |     if (max_abs_unit > 1.0) {
124 |         return aabb_center + color_vector / max_abs_unit; // clip towards color vector
125 |     } else {
126 |         return prev_sample; // point is inside aabb
127 |     }
128 | }
129 | 
130 | #ifdef FFX_DNSR_REFLECTIONS_ESTIMATES_LOCAL_NEIGHBORHOOD
131 | 
132 | #    ifndef FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS
133 | #        define FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS 4
134 | #    endif
135 | 
136 | min16float FFX_DNSR_Reflections_LocalNeighborhoodKernelWeight(min16float i) {
137 |     const min16float radius = FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS + 1.0;
138 |     return exp(-FFX_DNSR_REFLECTIONS_GAUSSIAN_K * (i * i) / (radius * radius));
139 | }
140 | 
141 | #endif // FFX_DNSR_REFLECTIONS_ESTIMATES_LOCAL_NEIGHBORHOOD
142 | 
143 | #endif // FFX_DNSR_REFLECTIONS_COMMON


--------------------------------------------------------------------------------
/ffx-reflection-dnsr/ffx_denoiser_reflections_config.h:
--------------------------------------------------------------------------------
 1 | /**********************************************************************
 2 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | ********************************************************************/
22 | 
23 | #ifndef FFX_DNSR_REFLECTIONS_CONFIG
24 | #define FFX_DNSR_REFLECTIONS_CONFIG
25 | 
26 | #define FFX_DNSR_REFLECTIONS_GAUSSIAN_K 3.0
27 | #define FFX_DNSR_REFLECTIONS_RADIANCE_WEIGHT_BIAS 0.6
28 | #define FFX_DNSR_REFLECTIONS_RADIANCE_WEIGHT_VARIANCE_K 0.1
29 | #define FFX_DNSR_REFLECTIONS_AVG_RADIANCE_LUMINANCE_WEIGHT 0.3
30 | #define FFX_DNSR_REFLECTIONS_PREFILTER_VARIANCE_WEIGHT 4.4
31 | #define FFX_DNSR_REFLECTIONS_REPROJECT_SURFACE_DISCARD_VARIANCE_WEIGHT 1.5
32 | #define FFX_DNSR_REFLECTIONS_PREFILTER_VARIANCE_BIAS 0.1
33 | #define FFX_DNSR_REFLECTIONS_PREFILTER_NORMAL_SIGMA 512.0
34 | #define FFX_DNSR_REFLECTIONS_PREFILTER_DEPTH_SIGMA 4.0
35 | #define FFX_DNSR_REFLECTIONS_DISOCCLUSION_NORMAL_WEIGHT 1.4
36 | #define FFX_DNSR_REFLECTIONS_DISOCCLUSION_DEPTH_WEIGHT 1.0
37 | #define FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD 0.9
38 | #define FFX_DNSR_REFLECTIONS_REPROJECTION_NORMAL_SIMILARITY_THRESHOLD 0.9999
39 | #define FFX_DNSR_REFLECTIONS_SAMPLES_FOR_ROUGHNESS(r) (1.0 - exp(-r * 100.0))
40 | 
41 | #endif // FFX_DNSR_REFLECTIONS_CONFIG


--------------------------------------------------------------------------------
/ffx-reflection-dnsr/ffx_denoiser_reflections_prefilter.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /**********************************************************************
  3 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | of this software and associated documentation files (the "Software"), to deal
  7 | in the Software without restriction, including without limitation the rights
  8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | copies of the Software, and to permit persons to whom the Software is
 10 | furnished to do so, subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in
 13 | all copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | THE SOFTWARE.
 22 | ********************************************************************/
 23 | 
 24 | #ifndef FFX_DNSR_REFLECTIONS_PREFILTER
 25 | #define FFX_DNSR_REFLECTIONS_PREFILTER
 26 | 
 27 | #include "ffx_denoiser_reflections_common.h"
 28 | 
 29 | groupshared uint  g_ffx_dnsr_shared_0[16][16];
 30 | groupshared uint  g_ffx_dnsr_shared_1[16][16];
 31 | groupshared uint  g_ffx_dnsr_shared_2[16][16];
 32 | groupshared uint  g_ffx_dnsr_shared_3[16][16];
 33 | groupshared float g_ffx_dnsr_shared_depth[16][16];
 34 | 
 35 | struct FFX_DNSR_Reflections_NeighborhoodSample {
 36 |     min16float3 radiance;
 37 |     min16float  variance;
 38 |     min16float3 normal;
 39 |     float       depth;
 40 | };
 41 | 
 42 | FFX_DNSR_Reflections_NeighborhoodSample FFX_DNSR_Reflections_LoadFromGroupSharedMemory(int2 idx) {
 43 |     uint2       packed_radiance          = uint2(g_ffx_dnsr_shared_0[idx.y][idx.x], g_ffx_dnsr_shared_1[idx.y][idx.x]);
 44 |     min16float4 unpacked_radiance        = FFX_DNSR_Reflections_UnpackFloat16_4(packed_radiance);
 45 |     uint2       packed_normal_variance   = uint2(g_ffx_dnsr_shared_2[idx.y][idx.x], g_ffx_dnsr_shared_3[idx.y][idx.x]);
 46 |     min16float4 unpacked_normal_variance = FFX_DNSR_Reflections_UnpackFloat16_4(packed_normal_variance);
 47 | 
 48 |     FFX_DNSR_Reflections_NeighborhoodSample sample;
 49 |     sample.radiance = unpacked_radiance.xyz;
 50 |     sample.normal   = unpacked_normal_variance.xyz;
 51 |     sample.variance = unpacked_normal_variance.w;
 52 |     sample.depth    = g_ffx_dnsr_shared_depth[idx.y][idx.x];
 53 |     return sample;
 54 | }
 55 | 
 56 | void FFX_DNSR_Reflections_StoreInGroupSharedMemory(int2 group_thread_id, min16float3 radiance, min16float variance, min16float3 normal, float depth) {
 57 |     g_ffx_dnsr_shared_0[group_thread_id.y][group_thread_id.x]     = FFX_DNSR_Reflections_PackFloat16(radiance.xy);
 58 |     g_ffx_dnsr_shared_1[group_thread_id.y][group_thread_id.x]     = FFX_DNSR_Reflections_PackFloat16(radiance.zz);
 59 |     g_ffx_dnsr_shared_2[group_thread_id.y][group_thread_id.x]     = FFX_DNSR_Reflections_PackFloat16(normal.xy);
 60 |     g_ffx_dnsr_shared_3[group_thread_id.y][group_thread_id.x]     = FFX_DNSR_Reflections_PackFloat16(min16float2(normal.z, variance));
 61 |     g_ffx_dnsr_shared_depth[group_thread_id.y][group_thread_id.x] = depth;
 62 | }
 63 | 
 64 | void FFX_DNSR_Reflections_InitializeGroupSharedMemory(int2 dispatch_thread_id, int2 group_thread_id, int2 screen_size) {
 65 |     // Load 16x16 region into shared memory using 4 8x8 blocks.
 66 |     int2 offset[4] = {int2(0, 0), int2(8, 0), int2(0, 8), int2(8, 8)};
 67 | 
 68 |     // Intermediate storage registers to cache the result of all loads
 69 |     min16float3 radiance[4];
 70 |     min16float  variance[4];
 71 |     min16float3 normal[4];
 72 |     float       depth[4];
 73 | 
 74 |     // Start in the upper left corner of the 16x16 region.
 75 |     dispatch_thread_id -= 4;
 76 | 
 77 |     // First store all loads in registers
 78 |     for (int i = 0; i < 4; ++i) {
 79 |         FFX_DNSR_Reflections_LoadNeighborhood(dispatch_thread_id + offset[i], radiance[i], variance[i], normal[i], depth[i], screen_size);
 80 |     }
 81 | 
 82 |     // Then move all registers to groupshared memory
 83 |     for (int j = 0; j < 4; ++j) {
 84 |         FFX_DNSR_Reflections_StoreInGroupSharedMemory(group_thread_id + offset[j], radiance[j], variance[j], normal[j], depth[j]); // X
 85 |     }
 86 | }
 87 | 
 88 | min16float FFX_DNSR_Reflections_GetEdgeStoppingNormalWeight(min16float3 normal_p, min16float3 normal_q) {
 89 |     return pow(max(dot(normal_p, normal_q), 0.0), FFX_DNSR_REFLECTIONS_PREFILTER_NORMAL_SIGMA);
 90 | }
 91 | 
 92 | min16float FFX_DNSR_Reflections_GetEdgeStoppingDepthWeight(float center_depth, float neighbor_depth) {
 93 |     return exp(-abs(center_depth - neighbor_depth) * center_depth * FFX_DNSR_REFLECTIONS_PREFILTER_DEPTH_SIGMA);
 94 | }
 95 | 
 96 | min16float FFX_DNSR_Reflections_GetRadianceWeight(min16float3 center_radiance, min16float3 neighbor_radiance, min16float variance) {
 97 |     return max(exp(-(FFX_DNSR_REFLECTIONS_RADIANCE_WEIGHT_BIAS + variance * FFX_DNSR_REFLECTIONS_RADIANCE_WEIGHT_VARIANCE_K)
 98 |                     * length(center_radiance - neighbor_radiance.xyz))
 99 |             , 1.0e-2);
100 | }
101 | 
102 | void FFX_DNSR_Reflections_Resolve(int2 group_thread_id, min16float3 avg_radiance, FFX_DNSR_Reflections_NeighborhoodSample center,
103 |                                   out min16float3 resolved_radiance, out min16float resolved_variance) {
104 |     // Initial weight is important to remove fireflies.
105 |     // That removes quite a bit of energy but makes everything much more stable.
106 |     min16float  accumulated_weight   = FFX_DNSR_Reflections_GetRadianceWeight(avg_radiance, center.radiance.xyz, center.variance);
107 |     min16float3 accumulated_radiance = center.radiance.xyz * accumulated_weight;
108 |     min16float  accumulated_variance = center.variance * accumulated_weight * accumulated_weight;
109 |     // First 15 numbers of Halton(2,3) streteched to [-3,3]. Skipping the center, as we already have that in center_radiance and center_variance.
110 |     const uint sample_count     = 15;
111 |     const int2 sample_offsets[] = {int2(0, 1),  int2(-2, 1),  int2(2, -3), int2(-3, 0),  int2(1, 2), int2(-1, -2), int2(3, 0), int2(-3, 3),
112 |                                    int2(0, -3), int2(-1, -1), int2(2, 1),  int2(-2, -2), int2(1, 0), int2(0, 2),   int2(3, -1)};
113 |     min16float variance_weight = max(FFX_DNSR_REFLECTIONS_PREFILTER_VARIANCE_BIAS,
114 |                                      1.0 - exp(-(center.variance * FFX_DNSR_REFLECTIONS_PREFILTER_VARIANCE_WEIGHT))
115 |                                     );
116 |     for (int i = 0; i < sample_count; ++i) {
117 |         int2                                    new_idx  = group_thread_id + sample_offsets[i];
118 |         FFX_DNSR_Reflections_NeighborhoodSample neighbor = FFX_DNSR_Reflections_LoadFromGroupSharedMemory(new_idx);
119 | 
120 |         min16float weight = 1.0;
121 |         weight *= FFX_DNSR_Reflections_GetEdgeStoppingNormalWeight(float3(center.normal), float3(neighbor.normal));
122 |         weight *= FFX_DNSR_Reflections_GetEdgeStoppingDepthWeight(center.depth, neighbor.depth);
123 |         weight *= FFX_DNSR_Reflections_GetRadianceWeight(avg_radiance, neighbor.radiance.xyz, center.variance);
124 |         weight *= variance_weight;
125 | 
126 |         // Accumulate all contributions.
127 |         accumulated_weight += weight;
128 |         accumulated_radiance += weight * neighbor.radiance.xyz;
129 |         accumulated_variance += weight * weight * neighbor.variance;
130 |     }
131 | 
132 |     accumulated_radiance /= accumulated_weight;
133 |     accumulated_variance /= (accumulated_weight * accumulated_weight);
134 |     resolved_radiance = accumulated_radiance;
135 |     resolved_variance = accumulated_variance;
136 | }
137 | 
138 | void FFX_DNSR_Reflections_Prefilter(int2 dispatch_thread_id, int2 group_thread_id, uint2 screen_size) {
139 |     min16float center_roughness = FFX_DNSR_Reflections_LoadRoughness(dispatch_thread_id);
140 |     FFX_DNSR_Reflections_InitializeGroupSharedMemory(dispatch_thread_id, group_thread_id, screen_size);
141 |     GroupMemoryBarrierWithGroupSync();
142 | 
143 |     group_thread_id += 4; // Center threads in groupshared memory
144 | 
145 |     FFX_DNSR_Reflections_NeighborhoodSample center = FFX_DNSR_Reflections_LoadFromGroupSharedMemory(group_thread_id);
146 | 
147 |     min16float3 resolved_radiance = center.radiance;
148 |     min16float  resolved_variance = center.variance;
149 | 
150 |     // Check if we have to denoise or if a simple copy is enough
151 |     bool needs_denoiser = center.variance > 0.0 && FFX_DNSR_Reflections_IsGlossyReflection(center_roughness) && !FFX_DNSR_Reflections_IsMirrorReflection(center_roughness);
152 |     if (needs_denoiser) {
153 |         float2      uv8          = (float2(dispatch_thread_id.xy) + (0.5).xx) / FFX_DNSR_Reflections_RoundUp8(screen_size);
154 |         min16float3 avg_radiance = FFX_DNSR_Reflections_SampleAverageRadiance(uv8);
155 |         FFX_DNSR_Reflections_Resolve(group_thread_id, avg_radiance, center, resolved_radiance, resolved_variance);
156 |     }
157 | 
158 |     FFX_DNSR_Reflections_StorePrefilteredReflections(dispatch_thread_id, resolved_radiance, resolved_variance);
159 | }
160 | 
161 | #endif // FFX_DNSR_REFLECTIONS_PREFILTER


--------------------------------------------------------------------------------
/ffx-reflection-dnsr/ffx_denoiser_reflections_reproject.h:
--------------------------------------------------------------------------------
  1 | /**********************************************************************
  2 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | ********************************************************************/
 22 | 
 23 | #ifndef FFX_DNSR_REFLECTIONS_REPROJECT
 24 | #define FFX_DNSR_REFLECTIONS_REPROJECT
 25 | 
 26 | #define FFX_DNSR_REFLECTIONS_ESTIMATES_LOCAL_NEIGHBORHOOD
 27 | #include "ffx_denoiser_reflections_common.h"
 28 | 
 29 | groupshared uint  g_ffx_dnsr_shared_0[16][16];
 30 | groupshared uint  g_ffx_dnsr_shared_1[16][16];
 31 | 
 32 | struct FFX_DNSR_Reflections_NeighborhoodSample {
 33 |     min16float3 radiance;
 34 | };
 35 | 
 36 | FFX_DNSR_Reflections_NeighborhoodSample FFX_DNSR_Reflections_LoadFromGroupSharedMemory(int2 idx) {
 37 |     uint2       packed_radiance          = uint2(g_ffx_dnsr_shared_0[idx.y][idx.x], g_ffx_dnsr_shared_1[idx.y][idx.x]);
 38 |     min16float4 unpacked_radiance        = FFX_DNSR_Reflections_UnpackFloat16_4(packed_radiance);
 39 | 
 40 |     FFX_DNSR_Reflections_NeighborhoodSample sample;
 41 |     sample.radiance = unpacked_radiance.xyz;
 42 |     return sample;
 43 | }
 44 | 
 45 | void FFX_DNSR_Reflections_StoreInGroupSharedMemory(int2 group_thread_id, min16float3 radiance) {
 46 |     g_ffx_dnsr_shared_0[group_thread_id.y][group_thread_id.x]     = FFX_DNSR_Reflections_PackFloat16(radiance.xy);
 47 |     g_ffx_dnsr_shared_1[group_thread_id.y][group_thread_id.x]     = FFX_DNSR_Reflections_PackFloat16(radiance.zz);
 48 | }
 49 | 
 50 | void FFX_DNSR_Reflections_StoreInGroupSharedMemory(int2 group_thread_id, min16float4 radiance_variance) {
 51 |     g_ffx_dnsr_shared_0[group_thread_id.y][group_thread_id.x]     = FFX_DNSR_Reflections_PackFloat16(radiance_variance.xy);
 52 |     g_ffx_dnsr_shared_1[group_thread_id.y][group_thread_id.x]     = FFX_DNSR_Reflections_PackFloat16(radiance_variance.zw);
 53 | }
 54 | 
 55 | void FFX_DNSR_Reflections_InitializeGroupSharedMemory(int2 dispatch_thread_id, int2 group_thread_id, int2 screen_size) {
 56 |     // Load 16x16 region into shared memory using 4 8x8 blocks.
 57 |     int2 offset[4] = {int2(0, 0), int2(8, 0), int2(0, 8), int2(8, 8)};
 58 | 
 59 |     // Intermediate storage registers to cache the result of all loads
 60 |     min16float3 radiance[4];
 61 | 
 62 |     // Start in the upper left corner of the 16x16 region.
 63 |     dispatch_thread_id -= 4;
 64 | 
 65 |     // First store all loads in registers
 66 |     for (int i = 0; i < 4; ++i) {
 67 |         radiance[i] = FFX_DNSR_Reflections_LoadRadiance(dispatch_thread_id + offset[i]);
 68 |     }
 69 | 
 70 |     // Then move all registers to groupshared memory
 71 |     for (int j = 0; j < 4; ++j) {
 72 |         FFX_DNSR_Reflections_StoreInGroupSharedMemory(group_thread_id + offset[j], radiance[j]); // X
 73 |     }
 74 | }
 75 | 
 76 | min16float4 FFX_DNSR_Reflections_LoadFromGroupSharedMemoryRaw(int2 idx) {
 77 |     uint2 packed_radiance = uint2(g_ffx_dnsr_shared_0[idx.y][idx.x], g_ffx_dnsr_shared_1[idx.y][idx.x]);
 78 |     return FFX_DNSR_Reflections_UnpackFloat16_4(packed_radiance);
 79 | }
 80 | 
 81 | min16float FFX_DNSR_Reflections_GetLuminanceWeight(min16float3 val) {
 82 |     min16float luma   = FFX_DNSR_Reflections_Luminance(val.xyz);
 83 |     min16float weight = max(exp(-luma * FFX_DNSR_REFLECTIONS_AVG_RADIANCE_LUMINANCE_WEIGHT), 1.0e-2);
 84 |     return weight;
 85 | }
 86 | 
 87 | float2 FFX_DNSR_Reflections_GetSurfaceReprojection(int2 dispatch_thread_id, float2 uv, float2 motion_vector) {
 88 |     // Reflector position reprojection
 89 |     float2 history_uv = uv - motion_vector;
 90 |     return history_uv;
 91 | }
 92 | 
 93 | float2 FFX_DNSR_Reflections_GetHitPositionReprojection(int2 dispatch_thread_id, float2 uv, float reflected_ray_length) {
 94 |     float  z              = FFX_DNSR_Reflections_LoadDepth(dispatch_thread_id);
 95 |     float3 view_space_ray = FFX_DNSR_Reflections_ScreenSpaceToViewSpace(float3(uv, z));
 96 | 
 97 |     // We start out with reconstructing the ray length in view space.
 98 |     // This includes the portion from the camera to the reflecting surface as well as the portion from the surface to the hit position.
 99 |     float surface_depth = length(view_space_ray);
100 |     float ray_length    = surface_depth + reflected_ray_length;
101 | 
102 |     // We then perform a parallax correction by shooting a ray
103 |     // of the same length "straight through" the reflecting surface
104 |     // and reprojecting the tip of that ray to the previous frame.
105 |     view_space_ray /= surface_depth; // == normalize(view_space_ray)
106 |     view_space_ray *= ray_length;
107 |     float3 world_hit_position =
108 |         FFX_DNSR_Reflections_ViewSpaceToWorldSpace(float4(view_space_ray, 1)); // This is the "fake" hit position if we would follow the ray straight through the surface.
109 |     float3 prev_hit_position = FFX_DNSR_Reflections_WorldSpaceToScreenSpacePrevious(world_hit_position);
110 |     float2 history_uv        = prev_hit_position.xy;
111 |     return history_uv;
112 | }
113 | 
114 | min16float FFX_DNSR_Reflections_GetDisocclusionFactor(min16float3 normal, min16float3 history_normal, float linear_depth, float history_linear_depth) {
115 |     min16float factor = 1.0                                                            //
116 |                         * exp(-abs(1.0 - max(0.0, dot(normal, history_normal))) * FFX_DNSR_REFLECTIONS_DISOCCLUSION_NORMAL_WEIGHT) //
117 |                         * exp(-abs(history_linear_depth - linear_depth) / linear_depth * FFX_DNSR_REFLECTIONS_DISOCCLUSION_DEPTH_WEIGHT);
118 |     return factor;
119 | }
120 | 
121 | struct FFX_DNSR_Reflections_Moments {
122 |     min16float3 mean;
123 |     min16float3 variance;
124 | };
125 | 
126 | FFX_DNSR_Reflections_Moments FFX_DNSR_Reflections_EstimateLocalNeighborhoodInGroup(int2 group_thread_id) {
127 |     FFX_DNSR_Reflections_Moments estimate;
128 |     estimate.mean                 = 0;
129 |     estimate.variance             = 0;
130 |     min16float accumulated_weight = 0;
131 |     for (int j = -FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS; j <= FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS; ++j) {
132 |         for (int i = -FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS; i <= FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS; ++i) {
133 |             int2        new_idx  = group_thread_id + int2(i, j);
134 |             min16float3 radiance = FFX_DNSR_Reflections_LoadFromGroupSharedMemory(new_idx).radiance;
135 |             min16float  weight   = FFX_DNSR_Reflections_LocalNeighborhoodKernelWeight(i) * FFX_DNSR_Reflections_LocalNeighborhoodKernelWeight(j);
136 |             accumulated_weight  += weight;
137 |             estimate.mean       += radiance * weight;
138 |             estimate.variance   += radiance * radiance * weight;
139 |         }
140 |     }
141 |     estimate.mean     /= accumulated_weight;
142 |     estimate.variance /= accumulated_weight;
143 | 
144 |     estimate.variance = abs(estimate.variance - estimate.mean * estimate.mean);
145 |     return estimate;
146 | }
147 | 
148 | float dot2(float3 a) { return dot(a, a); }
149 | 
150 | void FFX_DNSR_Reflections_PickReprojection(int2            dispatch_thread_id,  //
151 |                                            int2            group_thread_id,     //
152 |                                            uint2           screen_size,         //
153 |                                            min16float      roughness,           //
154 |                                            min16float      ray_length,          //
155 |                                            out min16float  disocclusion_factor, //
156 |                                            out float2      reprojection_uv,     //
157 |                                            out min16float3 reprojection) {
158 | 
159 |     FFX_DNSR_Reflections_Moments local_neighborhood = FFX_DNSR_Reflections_EstimateLocalNeighborhoodInGroup(group_thread_id);
160 | 
161 |     float2      uv     = float2(dispatch_thread_id.x + 0.5, dispatch_thread_id.y + 0.5) / screen_size;
162 |     min16float3 normal = FFX_DNSR_Reflections_LoadWorldSpaceNormal(dispatch_thread_id);
163 |     min16float3 history_normal;
164 |     float       history_linear_depth;
165 | 
166 |     {
167 |         const float2      motion_vector             = FFX_DNSR_Reflections_LoadMotionVector(dispatch_thread_id);
168 |         const float2      surface_reprojection_uv   = FFX_DNSR_Reflections_GetSurfaceReprojection(dispatch_thread_id, uv, motion_vector);
169 |         const float2      hit_reprojection_uv       = FFX_DNSR_Reflections_GetHitPositionReprojection(dispatch_thread_id, uv, ray_length);
170 |         const min16float3 surface_normal            = FFX_DNSR_Reflections_SampleWorldSpaceNormalHistory(surface_reprojection_uv);
171 |         const min16float3 hit_normal                = FFX_DNSR_Reflections_SampleWorldSpaceNormalHistory(hit_reprojection_uv);
172 |         const min16float3 surface_history           = FFX_DNSR_Reflections_SampleRadianceHistory(surface_reprojection_uv);
173 |         const min16float3 hit_history               = FFX_DNSR_Reflections_SampleRadianceHistory(hit_reprojection_uv);
174 |         const float       hit_normal_similarity     = dot(normalize((float3)hit_normal), normalize((float3)normal));
175 |         const float       surface_normal_similarity = dot(normalize((float3)surface_normal), normalize((float3)normal));
176 |         const min16float  hit_roughness             = FFX_DNSR_Reflections_SampleRoughnessHistory(hit_reprojection_uv);
177 |         const min16float  surface_roughness         = FFX_DNSR_Reflections_SampleRoughnessHistory(surface_reprojection_uv);
178 | 
179 |         // Choose reprojection uv based on similarity to the local neighborhood.
180 |         if (hit_normal_similarity > FFX_DNSR_REFLECTIONS_REPROJECTION_NORMAL_SIMILARITY_THRESHOLD  // Candidate for mirror reflection parallax
181 |             && hit_normal_similarity + 1.0e-3 > surface_normal_similarity                          //
182 |             && abs(hit_roughness - roughness) < abs(surface_roughness - roughness) + 1.0e-3        //
183 |         ) {
184 |             history_normal                 = hit_normal;
185 |             float hit_history_depth        = FFX_DNSR_Reflections_SampleDepthHistory(hit_reprojection_uv);
186 |             float hit_history_linear_depth = FFX_DNSR_Reflections_GetLinearDepth(hit_reprojection_uv, hit_history_depth);
187 |             history_linear_depth           = hit_history_linear_depth;
188 |             reprojection_uv                = hit_reprojection_uv;
189 |             reprojection                   = hit_history;
190 |         } else {
191 |             // Reject surface reprojection based on simple distance
192 |             if (dot2(surface_history - local_neighborhood.mean) <
193 |                 FFX_DNSR_REFLECTIONS_REPROJECT_SURFACE_DISCARD_VARIANCE_WEIGHT * length(local_neighborhood.variance)) {
194 |                 history_normal                     = surface_normal;
195 |                 float surface_history_depth        = FFX_DNSR_Reflections_SampleDepthHistory(surface_reprojection_uv);
196 |                 float surface_history_linear_depth = FFX_DNSR_Reflections_GetLinearDepth(surface_reprojection_uv, surface_history_depth);
197 |                 history_linear_depth               = surface_history_linear_depth;
198 |                 reprojection_uv                    = surface_reprojection_uv;
199 |                 reprojection                       = surface_history;
200 |             } else {
201 |                 disocclusion_factor = 0.0;
202 |                 return;
203 |             }
204 |         }
205 |     }
206 |     float depth        = FFX_DNSR_Reflections_LoadDepth(dispatch_thread_id);
207 |     float linear_depth = FFX_DNSR_Reflections_GetLinearDepth(uv, depth);
208 |     // Determine disocclusion factor based on history
209 |     disocclusion_factor = FFX_DNSR_Reflections_GetDisocclusionFactor(normal, history_normal, linear_depth, history_linear_depth);
210 | 
211 |     if (disocclusion_factor > FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD) // Early out, good enough
212 |         return;
213 | 
214 |     // Try to find the closest sample in the vicinity if we are not convinced of a disocclusion
215 |     if (disocclusion_factor < FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD) {
216 |         float2    closest_uv    = reprojection_uv;
217 |         float2    dudv          = 1.0 / float2(screen_size);
218 |         const int search_radius = 1;
219 |         for (int y = -search_radius; y <= search_radius; y++) {
220 |             for (int x = -search_radius; x <= search_radius; x++) {
221 |                 float2      uv                   = reprojection_uv + float2(x, y) * dudv;
222 |                 min16float3 history_normal       = FFX_DNSR_Reflections_SampleWorldSpaceNormalHistory(uv);
223 |                 float       history_depth        = FFX_DNSR_Reflections_SampleDepthHistory(uv);
224 |                 float       history_linear_depth = FFX_DNSR_Reflections_GetLinearDepth(uv, history_depth);
225 |                 min16float  weight               = FFX_DNSR_Reflections_GetDisocclusionFactor(normal, history_normal, linear_depth, history_linear_depth);
226 |                 if (weight > disocclusion_factor) {
227 |                     disocclusion_factor = weight;
228 |                     closest_uv          = uv;
229 |                     reprojection_uv     = closest_uv;
230 |                 }
231 |             }
232 |         }
233 |         reprojection = FFX_DNSR_Reflections_SampleRadianceHistory(reprojection_uv);
234 |     }
235 | 
236 |     // Rare slow path - triggered only on the edges.
237 |     // Try to get rid of potential leaks at bilinear interpolation level.
238 |     if (disocclusion_factor < FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD) {
239 |         // If we've got a discarded history, try to construct a better sample out of 2x2 interpolation neighborhood
240 |         // Helps quite a bit on the edges in movement
241 |         float       uvx                    = frac(float(screen_size.x) * reprojection_uv.x + 0.5);
242 |         float       uvy                    = frac(float(screen_size.y) * reprojection_uv.y + 0.5);
243 |         int2        reproject_texel_coords = int2(screen_size * reprojection_uv - 0.5);
244 |         min16float3 reprojection00         = FFX_DNSR_Reflections_LoadRadianceHistory(reproject_texel_coords + int2(0, 0));
245 |         min16float3 reprojection10         = FFX_DNSR_Reflections_LoadRadianceHistory(reproject_texel_coords + int2(1, 0));
246 |         min16float3 reprojection01         = FFX_DNSR_Reflections_LoadRadianceHistory(reproject_texel_coords + int2(0, 1));
247 |         min16float3 reprojection11         = FFX_DNSR_Reflections_LoadRadianceHistory(reproject_texel_coords + int2(1, 1));
248 |         min16float3 normal00               = FFX_DNSR_Reflections_LoadWorldSpaceNormalHistory(reproject_texel_coords + int2(0, 0));
249 |         min16float3 normal10               = FFX_DNSR_Reflections_LoadWorldSpaceNormalHistory(reproject_texel_coords + int2(1, 0));
250 |         min16float3 normal01               = FFX_DNSR_Reflections_LoadWorldSpaceNormalHistory(reproject_texel_coords + int2(0, 1));
251 |         min16float3 normal11               = FFX_DNSR_Reflections_LoadWorldSpaceNormalHistory(reproject_texel_coords + int2(1, 1));
252 |         float       depth00                = FFX_DNSR_Reflections_GetLinearDepth(reprojection_uv, FFX_DNSR_Reflections_LoadDepthHistory(reproject_texel_coords + int2(0, 0)));
253 |         float       depth10                = FFX_DNSR_Reflections_GetLinearDepth(reprojection_uv, FFX_DNSR_Reflections_LoadDepthHistory(reproject_texel_coords + int2(1, 0)));
254 |         float       depth01                = FFX_DNSR_Reflections_GetLinearDepth(reprojection_uv, FFX_DNSR_Reflections_LoadDepthHistory(reproject_texel_coords + int2(0, 1)));
255 |         float       depth11                = FFX_DNSR_Reflections_GetLinearDepth(reprojection_uv, FFX_DNSR_Reflections_LoadDepthHistory(reproject_texel_coords + int2(1, 1)));
256 |         min16float4 w                      = 1.0;
257 |         // Initialize with occlusion weights
258 |         w.x = FFX_DNSR_Reflections_GetDisocclusionFactor(normal, normal00, linear_depth, depth00) > FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD / 2.0 ? 1.0 : 0.0;
259 |         w.y = FFX_DNSR_Reflections_GetDisocclusionFactor(normal, normal10, linear_depth, depth10) > FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD / 2.0 ? 1.0 : 0.0;
260 |         w.z = FFX_DNSR_Reflections_GetDisocclusionFactor(normal, normal01, linear_depth, depth01) > FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD / 2.0 ? 1.0 : 0.0;
261 |         w.w = FFX_DNSR_Reflections_GetDisocclusionFactor(normal, normal11, linear_depth, depth11) > FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD / 2.0 ? 1.0 : 0.0;
262 |         // And then mix in bilinear weights
263 |         w.x           = w.x * (1.0 - uvx) * (1.0 - uvy);
264 |         w.y           = w.y * (uvx) * (1.0 - uvy);
265 |         w.z           = w.z * (1.0 - uvx) * (uvy);
266 |         w.w           = w.w * (uvx) * (uvy);
267 |         min16float ws = max(w.x + w.y + w.z + w.w, 1.0e-3);
268 |         // normalize
269 |         w /= ws;
270 | 
271 |         min16float3 history_normal;
272 |         float       history_linear_depth;
273 |         reprojection         = reprojection00 * w.x + reprojection10 * w.y + reprojection01 * w.z + reprojection11 * w.w;
274 |         history_linear_depth = depth00 * w.x + depth10 * w.y + depth01 * w.z + depth11 * w.w;
275 |         history_normal       = normal00 * w.x + normal10 * w.y + normal01 * w.z + normal11 * w.w;
276 |         disocclusion_factor  = FFX_DNSR_Reflections_GetDisocclusionFactor(normal, history_normal, linear_depth, history_linear_depth);
277 |     }
278 |     disocclusion_factor = disocclusion_factor < FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD ? 0.0 : disocclusion_factor;
279 | }
280 | 
281 | void FFX_DNSR_Reflections_Reproject(int2 dispatch_thread_id, int2 group_thread_id, uint2 screen_size, float temporal_stability_factor, int max_samples) {
282 |     FFX_DNSR_Reflections_InitializeGroupSharedMemory(dispatch_thread_id, group_thread_id, screen_size);
283 |     GroupMemoryBarrierWithGroupSync();
284 | 
285 |     group_thread_id += 4; // Center threads in groupshared memory
286 | 
287 |     min16float       variance    = 1.0;
288 |     min16float       num_samples = 0.0;
289 |     min16float       roughness   = FFX_DNSR_Reflections_LoadRoughness(dispatch_thread_id);
290 |     float3           normal      = FFX_DNSR_Reflections_LoadWorldSpaceNormal(dispatch_thread_id);
291 |     min16float3      radiance    = FFX_DNSR_Reflections_LoadRadiance(dispatch_thread_id);
292 |     const min16float ray_length  = FFX_DNSR_Reflections_LoadRayLength(dispatch_thread_id);
293 | 
294 |     if (FFX_DNSR_Reflections_IsGlossyReflection(roughness)) {
295 |         min16float  disocclusion_factor;
296 |         float2      reprojection_uv;
297 |         min16float3 reprojection;
298 |         FFX_DNSR_Reflections_PickReprojection(/*in*/ dispatch_thread_id,
299 |                                               /* in */ group_thread_id,
300 |                                               /* in */ screen_size,
301 |                                               /* in */ roughness,
302 |                                               /* in */ ray_length,
303 |                                               /* out */ disocclusion_factor,
304 |                                               /* out */ reprojection_uv,
305 |                                               /* out */ reprojection);
306 |         if (all(reprojection_uv > 0.0) && all(reprojection_uv < 1.0)) {
307 |             min16float prev_variance = FFX_DNSR_Reflections_SampleVarianceHistory(reprojection_uv);
308 |             num_samples              = FFX_DNSR_Reflections_SampleNumSamplesHistory(reprojection_uv) * disocclusion_factor;
309 |             min16float s_max_samples = max(8.0, max_samples * FFX_DNSR_REFLECTIONS_SAMPLES_FOR_ROUGHNESS(roughness));
310 |             num_samples              = min(s_max_samples, num_samples + 1);
311 |             min16float new_variance  = FFX_DNSR_Reflections_ComputeTemporalVariance(radiance.xyz, reprojection.xyz);
312 |             if (disocclusion_factor < FFX_DNSR_REFLECTIONS_DISOCCLUSION_THRESHOLD) {
313 |                 FFX_DNSR_Reflections_StoreRadianceReprojected(dispatch_thread_id, (0.0).xxx);
314 |                 FFX_DNSR_Reflections_StoreVariance(dispatch_thread_id, 1.0);
315 |                 FFX_DNSR_Reflections_StoreNumSamples(dispatch_thread_id, 1.0);
316 |             } else {
317 |                 min16float variance_mix = lerp(new_variance, prev_variance, 1.0 / num_samples);
318 |                 FFX_DNSR_Reflections_StoreRadianceReprojected(dispatch_thread_id, reprojection);
319 |                 FFX_DNSR_Reflections_StoreVariance(dispatch_thread_id, variance_mix);
320 |                 FFX_DNSR_Reflections_StoreNumSamples(dispatch_thread_id, num_samples);
321 |                 // Mix in reprojection for radiance mip computation 
322 |                 radiance = lerp(radiance, reprojection, 0.3);
323 |             }
324 |         } else {
325 |             FFX_DNSR_Reflections_StoreRadianceReprojected(dispatch_thread_id, (0.0).xxx);
326 |             FFX_DNSR_Reflections_StoreVariance(dispatch_thread_id, 1.0);
327 |             FFX_DNSR_Reflections_StoreNumSamples(dispatch_thread_id, 1.0);
328 |         }
329 |     }
330 |     
331 |     // Downsample 8x8 -> 1 radiance using groupshared memory
332 |     // Initialize groupshared array for downsampling
333 |     min16float weight = FFX_DNSR_Reflections_GetLuminanceWeight(radiance.xyz);
334 |     radiance.xyz *= weight;
335 |     if (any(dispatch_thread_id >= screen_size) || any(isinf(radiance)) || any(isnan(radiance)) || weight > 1.0e3) {
336 |         radiance = (0.0).xxxx;
337 |         weight   = 0.0;
338 |     }
339 | 
340 |     group_thread_id -= 4; // Center threads in groupshared memory
341 | 
342 |     FFX_DNSR_Reflections_StoreInGroupSharedMemory(group_thread_id, min16float4(radiance.xyz, weight));
343 |     GroupMemoryBarrierWithGroupSync();
344 | 
345 |     for (int i = 2; i <= 8; i = i * 2) {
346 |         int ox = group_thread_id.x * i;
347 |         int oy = group_thread_id.y * i;
348 |         int ix = group_thread_id.x * i + i / 2;
349 |         int iy = group_thread_id.y * i + i / 2;
350 |         if (ix < 8 && iy < 8) {
351 |             min16float4 rad_weight00 = FFX_DNSR_Reflections_LoadFromGroupSharedMemoryRaw(int2(ox, oy));
352 |             min16float4 rad_weight10 = FFX_DNSR_Reflections_LoadFromGroupSharedMemoryRaw(int2(ox, iy));
353 |             min16float4 rad_weight01 = FFX_DNSR_Reflections_LoadFromGroupSharedMemoryRaw(int2(ix, oy));
354 |             min16float4 rad_weight11 = FFX_DNSR_Reflections_LoadFromGroupSharedMemoryRaw(int2(ix, iy));
355 |             min16float4 sum          = rad_weight00 + rad_weight01 + rad_weight10 + rad_weight11;
356 |             FFX_DNSR_Reflections_StoreInGroupSharedMemory(int2(ox, oy), sum);
357 |         }
358 |         GroupMemoryBarrierWithGroupSync();
359 |     }
360 | 
361 |     if (all(group_thread_id == 0)) {
362 |         min16float4 sum          = FFX_DNSR_Reflections_LoadFromGroupSharedMemoryRaw(int2(0, 0));
363 |         min16float  weight_acc   = max(sum.w, 1.0e-3);
364 |         float3      radiance_avg = sum.xyz / weight_acc;
365 |         FFX_DNSR_Reflections_StoreAverageRadiance(dispatch_thread_id.xy / 8, radiance_avg);
366 |     }
367 | }
368 | 
369 | #endif // FFX_DNSR_REFLECTIONS_REPROJECT


--------------------------------------------------------------------------------
/ffx-reflection-dnsr/ffx_denoiser_reflections_resolve_temporal.h:
--------------------------------------------------------------------------------
  1 | /**********************************************************************
  2 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | ********************************************************************/
 22 | 
 23 | #ifndef FFX_DNSR_REFLECTIONS_RESOLVE_TEMPORAL
 24 | #define FFX_DNSR_REFLECTIONS_RESOLVE_TEMPORAL
 25 | 
 26 | #define FFX_DNSR_REFLECTIONS_ESTIMATES_LOCAL_NEIGHBORHOOD
 27 | #include "ffx_denoiser_reflections_common.h"
 28 | 
 29 | groupshared uint g_ffx_dnsr_shared_0[16][16];
 30 | groupshared uint g_ffx_dnsr_shared_1[16][16];
 31 | 
 32 | struct FFX_DNSR_Reflections_NeighborhoodSample {
 33 |     min16float3 radiance;
 34 | };
 35 | 
 36 | FFX_DNSR_Reflections_NeighborhoodSample FFX_DNSR_Reflections_LoadFromGroupSharedMemory(int2 idx) {
 37 |     uint2       packed_radiance   = uint2(g_ffx_dnsr_shared_0[idx.y][idx.x], g_ffx_dnsr_shared_1[idx.y][idx.x]);
 38 |     min16float3 unpacked_radiance = FFX_DNSR_Reflections_UnpackFloat16_4(packed_radiance).xyz;
 39 | 
 40 |     FFX_DNSR_Reflections_NeighborhoodSample sample;
 41 |     sample.radiance = unpacked_radiance;
 42 |     return sample;
 43 | }
 44 | 
 45 | struct FFX_DNSR_Reflections_Moments {
 46 |     min16float3 mean;
 47 |     min16float3 variance;
 48 | };
 49 | 
 50 | FFX_DNSR_Reflections_Moments FFX_DNSR_Reflections_EstimateLocalNeighborhoodInGroup(int2 group_thread_id) {
 51 |     FFX_DNSR_Reflections_Moments estimate;
 52 |     estimate.mean                 = 0;
 53 |     estimate.variance             = 0;
 54 |     min16float accumulated_weight = 0;
 55 |     for (int j = -FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS; j <= FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS; ++j) {
 56 |         for (int i = -FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS; i <= FFX_DNSR_REFLECTIONS_LOCAL_NEIGHBORHOOD_RADIUS; ++i) {
 57 |             int2        new_idx  = group_thread_id + int2(i, j);
 58 |             min16float3 radiance = FFX_DNSR_Reflections_LoadFromGroupSharedMemory(new_idx).radiance;
 59 |             min16float  weight   = FFX_DNSR_Reflections_LocalNeighborhoodKernelWeight(i) * FFX_DNSR_Reflections_LocalNeighborhoodKernelWeight(j);
 60 |             accumulated_weight  += weight;
 61 |             estimate.mean       += radiance * weight;
 62 |             estimate.variance   += radiance * radiance * weight;
 63 |         }
 64 |     }
 65 |     estimate.mean     /= accumulated_weight;
 66 |     estimate.variance /= accumulated_weight;
 67 | 
 68 |     estimate.variance = abs(estimate.variance - estimate.mean * estimate.mean);
 69 |     return estimate;
 70 | }
 71 | 
 72 | void FFX_DNSR_Reflections_StoreInGroupSharedMemory(int2 group_thread_id, min16float3 radiance) {
 73 |     g_ffx_dnsr_shared_0[group_thread_id.y][group_thread_id.x] = FFX_DNSR_Reflections_PackFloat16(radiance.xy);
 74 |     g_ffx_dnsr_shared_1[group_thread_id.y][group_thread_id.x] = FFX_DNSR_Reflections_PackFloat16(radiance.zz);
 75 | }
 76 | 
 77 | void FFX_DNSR_Reflections_LoadNeighborhood(int2 pixel_coordinate, out min16float3 radiance) { radiance = FFX_DNSR_Reflections_LoadRadiance(pixel_coordinate); }
 78 | 
 79 | void FFX_DNSR_Reflections_InitializeGroupSharedMemory(int2 dispatch_thread_id, int2 group_thread_id, int2 screen_size) {
 80 |     // Load 16x16 region into shared memory using 4 8x8 blocks.
 81 |     int2 offset[4] = {int2(0, 0), int2(8, 0), int2(0, 8), int2(8, 8)};
 82 | 
 83 |     // Intermediate storage registers to cache the result of all loads
 84 |     min16float3 radiance[4];
 85 | 
 86 |     // Start in the upper left corner of the 16x16 region.
 87 |     dispatch_thread_id -= 4;
 88 | 
 89 |     // First store all loads in registers
 90 |     for (int i = 0; i < 4; ++i) {
 91 |         FFX_DNSR_Reflections_LoadNeighborhood(dispatch_thread_id + offset[i], radiance[i]);
 92 |     }
 93 | 
 94 |     // Then move all registers to groupshared memory
 95 |     for (int j = 0; j < 4; ++j) {
 96 |         FFX_DNSR_Reflections_StoreInGroupSharedMemory(group_thread_id + offset[j], radiance[j]);
 97 |     }
 98 | }
 99 | 
100 | void FFX_DNSR_Reflections_ResolveTemporal(int2 dispatch_thread_id, int2 group_thread_id, uint2 screen_size, float2 inv_screen_size, float history_clip_weight) {
101 |     FFX_DNSR_Reflections_InitializeGroupSharedMemory(dispatch_thread_id, group_thread_id, screen_size);
102 |     GroupMemoryBarrierWithGroupSync();
103 | 
104 |     group_thread_id += 4; // Center threads in groupshared memory
105 | 
106 |     FFX_DNSR_Reflections_NeighborhoodSample center       = FFX_DNSR_Reflections_LoadFromGroupSharedMemory(group_thread_id);
107 |     min16float3                             new_signal   = center.radiance;
108 |     min16float                              roughness    = FFX_DNSR_Reflections_LoadRoughness(dispatch_thread_id);
109 |     min16float                              new_variance = FFX_DNSR_Reflections_LoadVariance(dispatch_thread_id);
110 |     
111 |     if (FFX_DNSR_Reflections_IsGlossyReflection(roughness)) {
112 |         min16float  num_samples  = FFX_DNSR_Reflections_LoadNumSamples(dispatch_thread_id);
113 |         float2      uv8          = (float2(dispatch_thread_id.xy) + (0.5).xx) / FFX_DNSR_Reflections_RoundUp8(screen_size);
114 |         min16float3 avg_radiance = FFX_DNSR_Reflections_SampleAverageRadiance(uv8);
115 | 
116 |         min16float3                  old_signal         = FFX_DNSR_Reflections_LoadRadianceReprojected(dispatch_thread_id);
117 |         FFX_DNSR_Reflections_Moments local_neighborhood = FFX_DNSR_Reflections_EstimateLocalNeighborhoodInGroup(group_thread_id);
118 |         // Clip history based on the curren local statistics
119 |         min16float3                  color_std          = (sqrt(local_neighborhood.variance.xyz) + length(local_neighborhood.mean.xyz - avg_radiance)) * history_clip_weight * 1.4;
120 |                             local_neighborhood.mean.xyz = lerp(local_neighborhood.mean.xyz, avg_radiance, 0.2);
121 |         min16float3                  radiance_min       = local_neighborhood.mean.xyz - color_std;
122 |         min16float3                  radiance_max       = local_neighborhood.mean.xyz + color_std;
123 |         min16float3                  clipped_old_signal = FFX_DNSR_Reflections_ClipAABB(radiance_min, radiance_max, old_signal.xyz);
124 |         min16float                   accumulation_speed = 1.0 / max(num_samples, 1.0);
125 |         min16float                   weight             = (1.0 - accumulation_speed);
126 |         // Blend with average for small sample count
127 |         new_signal.xyz                                  = lerp(new_signal.xyz, avg_radiance, 1.0 / max(num_samples + 1.0f, 1.0));
128 |         // Clip outliers
129 |         {
130 |             min16float3                  radiance_min       = avg_radiance.xyz - color_std * 1.0;
131 |             min16float3                  radiance_max       = avg_radiance.xyz + color_std * 1.0;
132 |             new_signal.xyz                                  = FFX_DNSR_Reflections_ClipAABB(radiance_min, radiance_max, new_signal.xyz);
133 |         }
134 |         // Blend with history
135 |         new_signal                                      = lerp(new_signal, clipped_old_signal, weight);
136 |         new_variance                                    = lerp(FFX_DNSR_Reflections_ComputeTemporalVariance(new_signal.xyz, clipped_old_signal.xyz), new_variance, weight);
137 |         if (any(isinf(new_signal)) || any(isnan(new_signal)) || any(isinf(new_variance)) || any(isnan(new_variance))) {
138 |             new_signal   = 0.0;
139 |             new_variance = 0.0;
140 |         }
141 | 
142 |     }
143 |     FFX_DNSR_Reflections_StoreTemporalAccumulation(dispatch_thread_id, new_signal, new_variance);
144 | }
145 | 
146 | #endif // FFX_DNSR_REFLECTIONS_RESOLVE_TEMPORAL


--------------------------------------------------------------------------------
/ffx-shadows-dnsr/ffx_denoiser_shadows_filter.h:
--------------------------------------------------------------------------------
  1 | /**********************************************************************
  2 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | ********************************************************************/
 22 | 
 23 | #ifndef FFX_DNSR_SHADOWS_FILTER_HLSL
 24 | #define FFX_DNSR_SHADOWS_FILTER_HLSL
 25 | 
 26 | #include "ffx_denoiser_shadows_util.h"
 27 | 
 28 | groupshared uint g_FFX_DNSR_Shadows_shared_input[16][16];
 29 | groupshared float g_FFX_DNSR_Shadows_shared_depth[16][16];
 30 | groupshared uint g_FFX_DNSR_Shadows_shared_normals_xy[16][16];
 31 | groupshared uint g_FFX_DNSR_Shadows_shared_normals_zw[16][16];
 32 | 
 33 | uint FFX_DNSR_Shadows_PackFloat16(float16_t2 v)
 34 | {
 35 |     uint2 p = f32tof16(float2(v));
 36 |     return p.x | (p.y << 16);
 37 | }
 38 | 
 39 | float16_t2 FFX_DNSR_Shadows_UnpackFloat16(uint a)
 40 | {
 41 |     float2 tmp = f16tof32(
 42 |         uint2(a & 0xFFFF, a >> 16));
 43 |     return float16_t2(tmp);
 44 | }
 45 | 
 46 | float16_t2 FFX_DNSR_Shadows_LoadInputFromGroupSharedMemory(int2 idx)
 47 | {
 48 |     return FFX_DNSR_Shadows_UnpackFloat16(g_FFX_DNSR_Shadows_shared_input[idx.y][idx.x]);
 49 | }
 50 | 
 51 | float FFX_DNSR_Shadows_LoadDepthFromGroupSharedMemory(int2 idx)
 52 | {
 53 |     return g_FFX_DNSR_Shadows_shared_depth[idx.y][idx.x];
 54 | }
 55 | 
 56 | float16_t3 FFX_DNSR_Shadows_LoadNormalsFromGroupSharedMemory(int2 idx)
 57 | {
 58 |     float16_t3 normals;
 59 |     normals.xy = FFX_DNSR_Shadows_UnpackFloat16(g_FFX_DNSR_Shadows_shared_normals_xy[idx.y][idx.x]);
 60 |     normals.z = FFX_DNSR_Shadows_UnpackFloat16(g_FFX_DNSR_Shadows_shared_normals_zw[idx.y][idx.x]).x;
 61 |     return normals;
 62 | }
 63 | 
 64 | void FFX_DNSR_Shadows_StoreInGroupSharedMemory(int2 idx, float16_t3 normals, float16_t2 input, float depth)
 65 | {
 66 |     g_FFX_DNSR_Shadows_shared_input[idx.y][idx.x] = FFX_DNSR_Shadows_PackFloat16(input);
 67 |     g_FFX_DNSR_Shadows_shared_depth[idx.y][idx.x] = depth;
 68 |     g_FFX_DNSR_Shadows_shared_normals_xy[idx.y][idx.x] = FFX_DNSR_Shadows_PackFloat16(normals.xy);
 69 |     g_FFX_DNSR_Shadows_shared_normals_zw[idx.y][idx.x] = FFX_DNSR_Shadows_PackFloat16(float16_t2(normals.z, 0));
 70 | }
 71 | 
 72 | void FFX_DNSR_Shadows_LoadWithOffset(int2 did, int2 offset, out float16_t3 normals, out float16_t2 input, out float depth)
 73 | {
 74 |     did += offset;
 75 | 
 76 |     const int2 p = clamp(did, int2(0, 0), FFX_DNSR_Shadows_GetBufferDimensions() - 1);
 77 |     normals = FFX_DNSR_Shadows_ReadNormals(p);
 78 |     input = FFX_DNSR_Shadows_ReadInput(p);
 79 |     depth = FFX_DNSR_Shadows_ReadDepth(p);
 80 | }
 81 | 
 82 | void FFX_DNSR_Shadows_StoreWithOffset(int2 gtid, int2 offset, float16_t3 normals, float16_t2 input, float depth)
 83 | {
 84 |     gtid += offset;
 85 |     FFX_DNSR_Shadows_StoreInGroupSharedMemory(gtid, normals, input, depth);
 86 | }
 87 | 
 88 | void FFX_DNSR_Shadows_InitializeGroupSharedMemory(int2 did, int2 gtid)
 89 | {
 90 |     int2 offset_0 = 0;
 91 |     int2 offset_1 = int2(8, 0);
 92 |     int2 offset_2 = int2(0, 8);
 93 |     int2 offset_3 = int2(8, 8);
 94 | 
 95 |     float16_t3 normals_0;
 96 |     float16_t2 input_0;
 97 |     float depth_0;
 98 | 
 99 |     float16_t3 normals_1;
100 |     float16_t2 input_1;
101 |     float depth_1;
102 | 
103 |     float16_t3 normals_2;
104 |     float16_t2 input_2;
105 |     float depth_2;
106 | 
107 |     float16_t3 normals_3;
108 |     float16_t2 input_3;
109 |     float depth_3;
110 | 
111 |     /// XA
112 |     /// BC
113 | 
114 |     did -= 4;
115 |     FFX_DNSR_Shadows_LoadWithOffset(did, offset_0, normals_0, input_0, depth_0); // X
116 |     FFX_DNSR_Shadows_LoadWithOffset(did, offset_1, normals_1, input_1, depth_1); // A
117 |     FFX_DNSR_Shadows_LoadWithOffset(did, offset_2, normals_2, input_2, depth_2); // B
118 |     FFX_DNSR_Shadows_LoadWithOffset(did, offset_3, normals_3, input_3, depth_3); // C
119 | 
120 |     FFX_DNSR_Shadows_StoreWithOffset(gtid, offset_0, normals_0, input_0, depth_0); // X
121 |     FFX_DNSR_Shadows_StoreWithOffset(gtid, offset_1, normals_1, input_1, depth_1); // A
122 |     FFX_DNSR_Shadows_StoreWithOffset(gtid, offset_2, normals_2, input_2, depth_2); // B
123 |     FFX_DNSR_Shadows_StoreWithOffset(gtid, offset_3, normals_3, input_3, depth_3); // C
124 | }
125 | 
126 | float FFX_DNSR_Shadows_GetShadowSimilarity(float x1, float x2, float sigma)
127 | {
128 |     return exp(-abs(x1 - x2) / sigma);
129 | }
130 | 
131 | float FFX_DNSR_Shadows_GetDepthSimilarity(float x1, float x2, float sigma)
132 | {
133 |     return exp(-abs(x1 - x2) / sigma);
134 | }
135 | 
136 | float FFX_DNSR_Shadows_GetNormalSimilarity(float3 x1, float3 x2)
137 | {
138 |     return pow(saturate(dot(x1, x2)), 32.0f);
139 | }
140 | 
141 | float FFX_DNSR_Shadows_GetLinearDepth(uint2 did, float depth)
142 | {
143 |     const float2 uv = (did + 0.5f) * FFX_DNSR_Shadows_GetInvBufferDimensions();
144 |     const float2 ndc = 2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f;
145 | 
146 |     float4 projected = mul(FFX_DNSR_Shadows_GetProjectionInverse(), float4(ndc, depth, 1));
147 |     return abs(projected.z / projected.w);
148 | }
149 | 
150 | float FFX_DNSR_Shadows_FetchFilteredVarianceFromGroupSharedMemory(int2 pos)
151 | {
152 |     const int k = 1;
153 |     float variance = 0.0f;
154 |     const float kernel[2][2] =
155 |     {
156 |         { 1.0f / 4.0f, 1.0f / 8.0f  },
157 |         { 1.0f / 8.0f, 1.0f / 16.0f }
158 |     };
159 |     for (int y = -k; y <= k; ++y)
160 |     {
161 |         for (int x = -k; x <= k; ++x)
162 |         {
163 |             const float w = kernel[abs(x)][abs(y)];
164 |             variance += w * FFX_DNSR_Shadows_LoadInputFromGroupSharedMemory(pos + int2(x, y)).y;
165 |         }
166 |     }
167 |     return variance;
168 | }
169 | 
170 | void FFX_DNSR_Shadows_DenoiseFromGroupSharedMemory(uint2 did, uint2 gtid, inout float weight_sum, inout float2 shadow_sum, float depth, uint stepsize)
171 | {
172 |     // Load our center sample
173 |     const float2 shadow_center = FFX_DNSR_Shadows_LoadInputFromGroupSharedMemory(gtid);
174 |     const float3 normal_center = FFX_DNSR_Shadows_LoadNormalsFromGroupSharedMemory(gtid);
175 | 
176 |     weight_sum = 1.0f;
177 |     shadow_sum = shadow_center;
178 | 
179 |     const float variance = FFX_DNSR_Shadows_FetchFilteredVarianceFromGroupSharedMemory(gtid);
180 |     const float std_deviation = sqrt(max(variance + 1e-9f, 0.0f));
181 |     const float depth_center = FFX_DNSR_Shadows_GetLinearDepth(did, depth);    // linearize the depth value
182 | 
183 |     // Iterate filter kernel
184 |     const int k = 1;
185 |     const float kernel[3] = { 1.0f, 2.0f / 3.0f, 1.0f / 6.0f };
186 | 
187 |     for (int y = -k; y <= k; ++y)
188 |     {
189 |         for (int x = -k; x <= k; ++x)
190 |         {
191 |             // Should we process this sample?
192 |             const int2 step = int2(x, y) * stepsize;
193 |             const int2 gtid_idx = gtid + step;
194 |             const int2 did_idx = did + step;
195 | 
196 |             float depth_neigh = FFX_DNSR_Shadows_LoadDepthFromGroupSharedMemory(gtid_idx);
197 |             float3 normal_neigh = FFX_DNSR_Shadows_LoadNormalsFromGroupSharedMemory(gtid_idx);
198 |             float2 shadow_neigh = FFX_DNSR_Shadows_LoadInputFromGroupSharedMemory(gtid_idx);
199 | 
200 |             float sky_pixel_multiplier = ((x == 0 && y == 0) || depth_neigh >= 1.0f || depth_neigh <= 0.0f) ? 0 : 1; // Zero weight for sky pixels
201 | 
202 |             // Fetch our filtering values
203 |             depth_neigh = FFX_DNSR_Shadows_GetLinearDepth(did_idx, depth_neigh);
204 | 
205 |             // Evaluate the edge-stopping function
206 |             float w = kernel[abs(x)] * kernel[abs(y)];  // kernel weight
207 |             w *= FFX_DNSR_Shadows_GetShadowSimilarity(shadow_center.x, shadow_neigh.x, std_deviation);
208 |             w *= FFX_DNSR_Shadows_GetDepthSimilarity(depth_center, depth_neigh, FFX_DNSR_Shadows_GetDepthSimilaritySigma());
209 |             w *= FFX_DNSR_Shadows_GetNormalSimilarity(normal_center, normal_neigh);
210 |             w *= sky_pixel_multiplier;
211 | 
212 |             // Accumulate the filtered sample
213 |             shadow_sum += float2(w, w * w) * shadow_neigh;
214 |             weight_sum += w;
215 |         }
216 |     }
217 | }
218 | 
219 | float2 FFX_DNSR_Shadows_ApplyFilterWithPrecache(uint2 did, uint2 gtid, uint stepsize)
220 | {
221 |     float weight_sum = 1.0;
222 |     float2 shadow_sum = 0.0;
223 | 
224 |     FFX_DNSR_Shadows_InitializeGroupSharedMemory(did, gtid);
225 |     bool needs_denoiser = FFX_DNSR_Shadows_IsShadowReciever(did);
226 |     GroupMemoryBarrierWithGroupSync();
227 |     if (needs_denoiser)
228 |     {
229 |         float depth = FFX_DNSR_Shadows_ReadDepth(did);
230 |         gtid += 4; // Center threads in groupshared memory
231 |         FFX_DNSR_Shadows_DenoiseFromGroupSharedMemory(did, gtid, weight_sum, shadow_sum, depth, stepsize);
232 |     }
233 | 
234 |     float mean = shadow_sum.x / weight_sum;
235 |     float variance = shadow_sum.y / (weight_sum * weight_sum);
236 |     return float2(mean, variance);
237 | }
238 | 
239 | void FFX_DNSR_Shadows_ReadTileMetaData(uint2 gid, out bool is_cleared, out bool all_in_light)
240 | {
241 |     uint meta_data = FFX_DNSR_Shadows_ReadTileMetaData(gid.y * FFX_DNSR_Shadows_RoundedDivide(FFX_DNSR_Shadows_GetBufferDimensions().x, 8) + gid.x);
242 |     is_cleared = meta_data & TILE_META_DATA_CLEAR_MASK;
243 |     all_in_light = meta_data & TILE_META_DATA_LIGHT_MASK;
244 | }
245 | 
246 | 
247 | float2 FFX_DNSR_Shadows_FilterSoftShadowsPass(uint2 gid, uint2 gtid, uint2 did, out bool bWriteResults, uint const pass, uint const stepsize)
248 | {
249 |     bool is_cleared;
250 |     bool all_in_light;
251 |     FFX_DNSR_Shadows_ReadTileMetaData(gid, is_cleared, all_in_light);
252 | 
253 |     bWriteResults = false;
254 |     float2 results = float2(0, 0);
255 |     [branch]
256 |     if (is_cleared)
257 |     {
258 |         if (pass != 1)
259 |         {
260 |             results.x = all_in_light ? 1.0 : 0.0;
261 |             bWriteResults = true;
262 |         }
263 |     }
264 |     else
265 |     {
266 |         results = FFX_DNSR_Shadows_ApplyFilterWithPrecache(did, gtid, stepsize);
267 |         bWriteResults = true;
268 |     }
269 | 
270 |     return results;
271 | }
272 | 
273 | #endif
274 | 


--------------------------------------------------------------------------------
/ffx-shadows-dnsr/ffx_denoiser_shadows_prepare.h:
--------------------------------------------------------------------------------
 1 | /**********************************************************************
 2 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | ********************************************************************/
22 | 
23 | #ifndef FFX_DNSR_SHADOWS_PREPARESHADOWMASK_HLSL
24 | #define FFX_DNSR_SHADOWS_PREPARESHADOWMASK_HLSL
25 | 
26 | #include "ffx_denoiser_shadows_util.h"
27 | 
28 | void FFX_DNSR_Shadows_CopyResult(uint2 gtid, uint2 gid)
29 | {
30 |     const uint2 did = gid * uint2(8, 4) + gtid;
31 |     const uint linear_tile_index = FFX_DNSR_Shadows_LinearTileIndex(gid, FFX_DNSR_Shadows_GetBufferDimensions().x);
32 |     const bool hit_light = FFX_DNSR_Shadows_HitsLight(did, gtid, gid);
33 |     const uint lane_mask = hit_light ? FFX_DNSR_Shadows_GetBitMaskFromPixelPosition(did) : 0;
34 |     FFX_DNSR_Shadows_WriteMask(linear_tile_index, WaveActiveBitOr(lane_mask));
35 | }
36 |  
37 | void FFX_DNSR_Shadows_PrepareShadowMask(uint2 gtid, uint2 gid)
38 | {
39 |     gid *= 4;
40 |     uint2 tile_dimensions = (FFX_DNSR_Shadows_GetBufferDimensions() + uint2(7, 3)) / uint2(8, 4);
41 | 
42 |     for (int i = 0; i < 4; ++i)
43 |     {
44 |         for (int j = 0; j < 4; ++j)
45 |         {
46 |             uint2 tile_id = uint2(gid.x + i, gid.y + j);
47 |             tile_id = clamp(tile_id, 0, tile_dimensions - 1);
48 |             FFX_DNSR_Shadows_CopyResult(gtid, tile_id);
49 |         }
50 |     }
51 | }
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/ffx-shadows-dnsr/ffx_denoiser_shadows_tileclassification.h:
--------------------------------------------------------------------------------
  1 | /**********************************************************************
  2 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | ********************************************************************/
 22 | 
 23 | #ifndef FFX_DNSR_SHADOWS_TILECLASSIFICATION_HLSL
 24 | #define FFX_DNSR_SHADOWS_TILECLASSIFICATION_HLSL
 25 | 
 26 | #include "ffx_denoiser_shadows_util.h"
 27 | 
 28 | groupshared int g_FFX_DNSR_Shadows_false_count;
 29 | bool FFX_DNSR_Shadows_ThreadGroupAllTrue(bool val)
 30 | {
 31 |     const uint lane_count_in_thread_group = 64;
 32 |     if (WaveGetLaneCount() == lane_count_in_thread_group)
 33 |     {
 34 |         return WaveActiveAllTrue(val);
 35 |     }
 36 |     else
 37 |     {
 38 |         GroupMemoryBarrierWithGroupSync();
 39 |         g_FFX_DNSR_Shadows_false_count = 0;
 40 |         GroupMemoryBarrierWithGroupSync();
 41 |         if (!val) g_FFX_DNSR_Shadows_false_count = 1;
 42 |         GroupMemoryBarrierWithGroupSync();
 43 |         return g_FFX_DNSR_Shadows_false_count == 0;
 44 |     }
 45 | }
 46 | 
 47 | void FFX_DNSR_Shadows_SearchSpatialRegion(uint2 gid, out bool all_in_light, out bool all_in_shadow)
 48 | {
 49 |     // The spatial passes can reach a total region of 1+2+4 = 7x7 around each block.
 50 |     // The masks are 8x4, so we need a larger vertical stride
 51 | 
 52 |     // Visualization - each x represents a 4x4 block, xx is one entire 8x4 mask as read from the raytracer result
 53 |     // Same for yy, these are the ones we are working on right now
 54 | 
 55 |     // xx xx xx
 56 |     // xx xx xx
 57 |     // xx yy xx <-- yy here is the base_tile below
 58 |     // xx yy xx
 59 |     // xx xx xx
 60 |     // xx xx xx
 61 | 
 62 |     // All of this should result in scalar ops
 63 |     uint2 base_tile = FFX_DNSR_Shadows_GetTileIndexFromPixelPosition(gid * int2(8, 8));
 64 | 
 65 |     // Load the entire region of masks in a scalar fashion
 66 |     uint combined_or_mask = 0;
 67 |     uint combined_and_mask = 0xFFFFFFFF;
 68 |     for (int j = -2; j <= 3; ++j)
 69 |     {
 70 |         for (int i = -1; i <= 1; ++i)
 71 |         {
 72 |             int2 tile_index = base_tile + int2(i, j);
 73 |             tile_index = clamp(tile_index, 0, int2(FFX_DNSR_Shadows_RoundedDivide(FFX_DNSR_Shadows_GetBufferDimensions().x, 8), FFX_DNSR_Shadows_RoundedDivide(FFX_DNSR_Shadows_GetBufferDimensions().y, 4)) - 1);
 74 |             const uint linear_tile_index = FFX_DNSR_Shadows_LinearTileIndex(tile_index, FFX_DNSR_Shadows_GetBufferDimensions().x);
 75 |             const uint shadow_mask = FFX_DNSR_Shadows_ReadRaytracedShadowMask(linear_tile_index);
 76 | 
 77 |             combined_or_mask = combined_or_mask | shadow_mask;
 78 |             combined_and_mask = combined_and_mask & shadow_mask;
 79 |         }
 80 |     }
 81 | 
 82 |     all_in_light = combined_and_mask == 0xFFFFFFFFu;
 83 |     all_in_shadow = combined_or_mask == 0u;
 84 | }
 85 | 
 86 | float FFX_DNSR_Shadows_GetLinearDepth(uint2 did, float depth)
 87 | {
 88 |     const float2 uv = (did + 0.5f) * FFX_DNSR_Shadows_GetInvBufferDimensions();
 89 |     const float2 ndc = 2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f;
 90 | 
 91 |     float4 projected = mul(FFX_DNSR_Shadows_GetProjectionInverse(), float4(ndc, depth, 1));
 92 |     return abs(projected.z / projected.w);
 93 | }
 94 | 
 95 | bool FFX_DNSR_Shadows_IsDisoccluded(uint2 did, float depth, float2 velocity)
 96 | {
 97 |     const int2 dims = FFX_DNSR_Shadows_GetBufferDimensions();
 98 |     const float2 texel_size = FFX_DNSR_Shadows_GetInvBufferDimensions();
 99 |     const float2 uv = (did + 0.5f) * texel_size;
100 |     const float2 ndc = (2.0f * uv - 1.0f) * float2(1.0f, -1.0f);
101 |     const float2 previous_uv = uv - velocity;
102 | 
103 |     bool is_disoccluded = true;
104 |     if (all(previous_uv > 0.0) && all(previous_uv < 1.0))
105 |     {
106 |         // Read the center values
107 |         float3 normal = FFX_DNSR_Shadows_ReadNormals(did);
108 | 
109 |         float4 clip_space = mul(FFX_DNSR_Shadows_GetReprojectionMatrix(), float4(ndc, depth, 1.0f));
110 |         clip_space /= clip_space.w; // perspective divide
111 | 
112 |         // How aligned with the view vector? (the more Z aligned, the higher the depth errors)
113 |         const float4 homogeneous = mul(FFX_DNSR_Shadows_GetViewProjectionInverse(), float4(ndc, depth, 1.0f));
114 |         const float3 world_position = homogeneous.xyz / homogeneous.w;  // perspective divide
115 |         const float3 view_direction = normalize(FFX_DNSR_Shadows_GetEye().xyz - world_position);
116 |         float z_alignment = 1.0f - dot(view_direction, normal);
117 |         z_alignment = pow(z_alignment, 8);
118 | 
119 |         // Calculate the depth difference
120 |         float linear_depth = FFX_DNSR_Shadows_GetLinearDepth(did, clip_space.z);   // get linear depth
121 | 
122 |         int2 idx = previous_uv * dims;
123 |         const float previous_depth = FFX_DNSR_Shadows_GetLinearDepth(idx, FFX_DNSR_Shadows_ReadPreviousDepth(idx));
124 |         const float depth_difference = abs(previous_depth - linear_depth) / linear_depth;
125 | 
126 |         // Resolve into the disocclusion mask
127 |         const float depth_tolerance = lerp(1e-2f, 1e-1f, z_alignment);
128 |         is_disoccluded = depth_difference >= depth_tolerance;
129 |     }
130 | 
131 |     return is_disoccluded;
132 | }
133 | 
134 | float2 FFX_DNSR_Shadows_GetClosestVelocity(int2 did, float depth)
135 | {
136 |     float2 closest_velocity = FFX_DNSR_Shadows_ReadVelocity(did);
137 |     float closest_depth = depth;
138 | 
139 |     float new_depth = QuadReadAcrossX(closest_depth);
140 |     float2 new_velocity = QuadReadAcrossX(closest_velocity);
141 | #ifdef INVERTED_DEPTH_RANGE
142 |     if (new_depth > closest_depth)
143 | #else
144 |     if (new_depth < closest_depth)
145 | #endif
146 |     {
147 |         closest_depth = new_depth;
148 |         closest_velocity = new_velocity;
149 |     }
150 | 
151 |     new_depth = QuadReadAcrossY(closest_depth);
152 |     new_velocity = QuadReadAcrossY(closest_velocity);
153 | #ifdef INVERTED_DEPTH_RANGE
154 |     if (new_depth > closest_depth)
155 | #else
156 |     if (new_depth < closest_depth)
157 | #endif
158 |     {
159 |         closest_depth = new_depth;
160 |         closest_velocity = new_velocity;
161 |     }
162 | 
163 |     return closest_velocity * float2(0.5f, -0.5f);  // from ndc to uv
164 | }
165 | 
166 | #define KERNEL_RADIUS 8
167 | float FFX_DNSR_Shadows_KernelWeight(float i)
168 | {
169 | #define KERNEL_WEIGHT(i) (exp(-3.0 * float(i * i) / ((KERNEL_RADIUS + 1.0) * (KERNEL_RADIUS + 1.0))))
170 | 
171 |     // Statically initialize kernel_weights_sum
172 |     float kernel_weights_sum = 0;
173 |     kernel_weights_sum += KERNEL_WEIGHT(0);
174 |     for (int c = 1; c <= KERNEL_RADIUS; ++c)
175 |     {
176 |         kernel_weights_sum += 2 * KERNEL_WEIGHT(c); // Add other half of the kernel to the sum
177 |     }
178 |     float inv_kernel_weights_sum = rcp(kernel_weights_sum);
179 | 
180 |     // The only runtime code in this function
181 |     return KERNEL_WEIGHT(i) * inv_kernel_weights_sum;
182 | }
183 | 
184 | void FFX_DNSR_Shadows_AccumulateMoments(float value, float weight, inout float moments)
185 | {
186 |     // We get value from the horizontal neighborhood calculations. Thus, it's both mean and variance due to using one sample per pixel
187 |     moments += value * weight;
188 | }
189 | 
190 | // The horizontal part of a 17x17 local neighborhood kernel
191 | float FFX_DNSR_Shadows_HorizontalNeighborhood(int2 did)
192 | {
193 |    const int2 base_did = did;
194 | 
195 |     // Prevent vertical out of bounds access
196 |     if ((base_did.y < 0) || (base_did.y >= FFX_DNSR_Shadows_GetBufferDimensions().y)) return 0;
197 | 
198 |     const uint2 tile_index = FFX_DNSR_Shadows_GetTileIndexFromPixelPosition(base_did);
199 |     const uint linear_tile_index = FFX_DNSR_Shadows_LinearTileIndex(tile_index, FFX_DNSR_Shadows_GetBufferDimensions().x);
200 | 
201 |     const int left_tile_index = linear_tile_index - 1;
202 |     const int center_tile_index = linear_tile_index;
203 |     const int right_tile_index = linear_tile_index + 1;
204 | 
205 |     bool is_first_tile_in_row = tile_index.x == 0;
206 |     bool is_last_tile_in_row = tile_index.x == (FFX_DNSR_Shadows_RoundedDivide(FFX_DNSR_Shadows_GetBufferDimensions().x, 8) - 1);
207 | 
208 |     uint left_tile = 0;
209 |     if (!is_first_tile_in_row) left_tile = FFX_DNSR_Shadows_ReadRaytracedShadowMask(left_tile_index);
210 |     uint center_tile = FFX_DNSR_Shadows_ReadRaytracedShadowMask(center_tile_index);
211 |     uint right_tile = 0;
212 |     if (!is_last_tile_in_row) right_tile = FFX_DNSR_Shadows_ReadRaytracedShadowMask(right_tile_index);
213 | 
214 |     // Construct a single uint with the lowest 17bits containing the horizontal part of the local neighborhood.
215 | 
216 |     // First extract the 8 bits of our row in each of the neighboring tiles
217 |     const uint row_base_index = (did.y % 4) * 8;
218 |     const uint left = (left_tile >> row_base_index) & 0xFF;
219 |     const uint center = (center_tile >> row_base_index) & 0xFF;
220 |     const uint right = (right_tile >> row_base_index) & 0xFF;
221 | 
222 |     // Combine them into a single mask containting [left, center, right] from least significant to most significant bit
223 |     uint neighborhood = left | (center << 8) | (right << 16);
224 | 
225 |     // Make sure our pixel is at bit position 9 to get the highest contribution from the filter kernel
226 |     const uint bit_index_in_row = (did.x % 8);
227 |     neighborhood = neighborhood >> bit_index_in_row; // Shift out bits to the right, so the center bit ends up at bit 9.
228 | 
229 |     float moment = 0.0; // For one sample per pixel this is both, mean and variance
230 | 
231 |     // First 8 bits up to the center pixel
232 |     uint mask;
233 |     int i;
234 |     for (i = 0; i < 8; ++i)
235 |     {
236 |         mask = 1u << i;
237 |         moment += (mask & neighborhood) ? FFX_DNSR_Shadows_KernelWeight(8 - i) : 0;
238 |     }
239 | 
240 |     // Center pixel
241 |     mask = 1u << 8;
242 |     moment += (mask & neighborhood) ? FFX_DNSR_Shadows_KernelWeight(0) : 0;
243 | 
244 |     // Last 8 bits
245 |     for (i = 1; i <= 8; ++i)
246 |     {
247 |         mask = 1u << (8 + i);
248 |         moment += (mask & neighborhood) ? FFX_DNSR_Shadows_KernelWeight(i) : 0;
249 |     }
250 | 
251 |     return moment;
252 | }
253 | 
254 | groupshared float g_FFX_DNSR_Shadows_neighborhood[8][24];
255 | 
256 | float FFX_DNSR_Shadows_ComputeLocalNeighborhood(int2 did, int2 gtid)
257 | {
258 |     float local_neighborhood = 0;
259 | 
260 |     float upper = FFX_DNSR_Shadows_HorizontalNeighborhood(int2(did.x, did.y - 8));
261 |     float center = FFX_DNSR_Shadows_HorizontalNeighborhood(int2(did.x, did.y));
262 |     float lower = FFX_DNSR_Shadows_HorizontalNeighborhood(int2(did.x, did.y + 8));
263 | 
264 |     g_FFX_DNSR_Shadows_neighborhood[gtid.x][gtid.y] = upper;
265 |     g_FFX_DNSR_Shadows_neighborhood[gtid.x][gtid.y + 8] = center;
266 |     g_FFX_DNSR_Shadows_neighborhood[gtid.x][gtid.y + 16] = lower;
267 | 
268 |     GroupMemoryBarrierWithGroupSync();
269 | 
270 |     // First combine the own values.
271 |     // KERNEL_RADIUS pixels up is own upper and KERNEL_RADIUS pixels down is own lower value
272 |     FFX_DNSR_Shadows_AccumulateMoments(center, FFX_DNSR_Shadows_KernelWeight(0), local_neighborhood);
273 |     FFX_DNSR_Shadows_AccumulateMoments(upper, FFX_DNSR_Shadows_KernelWeight(KERNEL_RADIUS), local_neighborhood);
274 |     FFX_DNSR_Shadows_AccumulateMoments(lower, FFX_DNSR_Shadows_KernelWeight(KERNEL_RADIUS), local_neighborhood);
275 | 
276 |     // Then read the neighboring values.
277 |     for (int i = 1; i < KERNEL_RADIUS; ++i)
278 |     {
279 |         float upper_value = g_FFX_DNSR_Shadows_neighborhood[gtid.x][8 + gtid.y - i];
280 |         float lower_value = g_FFX_DNSR_Shadows_neighborhood[gtid.x][8 + gtid.y + i];
281 |         float weight = FFX_DNSR_Shadows_KernelWeight(i);
282 |         FFX_DNSR_Shadows_AccumulateMoments(upper_value, weight, local_neighborhood);
283 |         FFX_DNSR_Shadows_AccumulateMoments(lower_value, weight, local_neighborhood);
284 |     }
285 | 
286 |     return local_neighborhood;
287 | }
288 | 
289 | void FFX_DNSR_Shadows_WriteTileMetaData(uint2 gid, uint2 gtid, bool is_cleared, bool all_in_light)
290 | {
291 |     if (all(gtid == 0))
292 |     {
293 |         uint light_mask = all_in_light ? TILE_META_DATA_LIGHT_MASK : 0;
294 |         uint clear_mask = is_cleared ? TILE_META_DATA_CLEAR_MASK : 0;
295 |         uint mask = light_mask | clear_mask;
296 |         FFX_DNSR_Shadows_WriteMetadata(gid.y * FFX_DNSR_Shadows_RoundedDivide(FFX_DNSR_Shadows_GetBufferDimensions().x, 8) + gid.x, mask);
297 |     }
298 | }
299 | 
300 | void FFX_DNSR_Shadows_ClearTargets(uint2 did, uint2 gtid, uint2 gid, float shadow_value, bool is_shadow_receiver, bool all_in_light)
301 | {
302 |     FFX_DNSR_Shadows_WriteTileMetaData(gid, gtid, true, all_in_light);
303 |     FFX_DNSR_Shadows_WriteReprojectionResults(did, float2(shadow_value, 0)); // mean, variance
304 | 
305 |     float temporal_sample_count = is_shadow_receiver ? 1 : 0;
306 |     FFX_DNSR_Shadows_WriteMoments(did, float3(shadow_value, 0, temporal_sample_count));// mean, variance, temporal sample count
307 | }
308 | 
309 | void FFX_DNSR_Shadows_TileClassification(uint group_index, uint2 gid)
310 | {
311 |     uint2 gtid = FFX_DNSR_Shadows_RemapLane8x8(group_index); // Make sure we can use the QuadReadAcross intrinsics to access a 2x2 region.
312 |     uint2 did = gid * 8 + gtid;
313 | 
314 |     bool is_shadow_receiver = FFX_DNSR_Shadows_IsShadowReciever(did);
315 | 
316 |     bool skip_sky = FFX_DNSR_Shadows_ThreadGroupAllTrue(!is_shadow_receiver);
317 |     if (skip_sky)
318 |     {
319 |         // We have to set all resources of the tile we skipped to sensible values as neighboring active denoiser tiles might want to read them.
320 |         FFX_DNSR_Shadows_ClearTargets(did, gtid, gid, 0, is_shadow_receiver, false);
321 |         return;
322 |     }
323 | 
324 |     bool all_in_light = false;
325 |     bool all_in_shadow = false;
326 |     FFX_DNSR_Shadows_SearchSpatialRegion(gid, all_in_light, all_in_shadow);
327 |     float shadow_value = all_in_light ? 1 : 0; // Either all_in_light or all_in_shadow must be true, otherwise we would not skip the tile.
328 | 
329 |     bool can_skip = all_in_light || all_in_shadow;
330 |     // We have to append the entire tile if there is a single lane that we can't skip
331 |     bool skip_tile = FFX_DNSR_Shadows_ThreadGroupAllTrue(can_skip);
332 |     if (skip_tile)
333 |     {
334 |         // We have to set all resources of the tile we skipped to sensible values as neighboring active denoiser tiles might want to read them.
335 |         FFX_DNSR_Shadows_ClearTargets(did, gtid, gid, shadow_value, is_shadow_receiver, all_in_light);
336 |         return;
337 |     }
338 | 
339 |     FFX_DNSR_Shadows_WriteTileMetaData(gid, gtid, false, false);
340 | 
341 |     float depth = FFX_DNSR_Shadows_ReadDepth(did);
342 |     const float2 velocity = FFX_DNSR_Shadows_GetClosestVelocity(did.xy, depth); // Must happen before we deactivate lanes
343 |     const float local_neighborhood = FFX_DNSR_Shadows_ComputeLocalNeighborhood(did, gtid);
344 | 
345 |     const float2 texel_size = FFX_DNSR_Shadows_GetInvBufferDimensions();
346 |     const float2 uv = (did.xy + 0.5f) * texel_size;
347 |     const float2 history_uv = uv - velocity;
348 |     const int2 history_pos = history_uv * FFX_DNSR_Shadows_GetBufferDimensions();
349 | 
350 |     const uint2 tile_index = FFX_DNSR_Shadows_GetTileIndexFromPixelPosition(did);
351 |     const uint linear_tile_index = FFX_DNSR_Shadows_LinearTileIndex(tile_index, FFX_DNSR_Shadows_GetBufferDimensions().x);
352 | 
353 |     const uint shadow_tile = FFX_DNSR_Shadows_ReadRaytracedShadowMask(linear_tile_index);
354 | 
355 |     float3 moments_current = 0;
356 |     float variance = 0;
357 |     float shadow_clamped = 0;
358 |     if (is_shadow_receiver) // do not process sky pixels
359 |     {
360 |         bool hit_light = shadow_tile & FFX_DNSR_Shadows_GetBitMaskFromPixelPosition(did);
361 |         const float shadow_current = hit_light ? 1.0 : 0.0;
362 | 
363 |         // Perform moments and variance calculations
364 |         {
365 |             bool is_disoccluded = FFX_DNSR_Shadows_IsDisoccluded(did, depth, velocity);
366 |             const float3 previous_moments = is_disoccluded ? float3(0.0f, 0.0f, 0.0f) // Can't trust previous moments on disocclusion
367 |                 : FFX_DNSR_Shadows_ReadPreviousMomentsBuffer(history_pos);
368 | 
369 |             const float old_m = previous_moments.x;
370 |             const float old_s = previous_moments.y;
371 |             const float sample_count = previous_moments.z + 1.0f;
372 |             const float new_m = old_m + (shadow_current - old_m) / sample_count;
373 |             const float new_s = old_s + (shadow_current - old_m) * (shadow_current - new_m);
374 | 
375 |             variance = (sample_count > 1.0f ? new_s / (sample_count - 1.0f) : 1.0f);
376 |             moments_current = float3(new_m, new_s, sample_count);
377 |         }
378 | 
379 |         // Retrieve local neighborhood and reproject
380 |         {
381 |             float mean = local_neighborhood;
382 |             float spatial_variance = local_neighborhood;
383 | 
384 |             spatial_variance = max(spatial_variance - mean * mean, 0.0f);
385 | 
386 |             // Compute the clamping bounding box
387 |             const float std_deviation = sqrt(spatial_variance);
388 |             const float nmin = mean - 0.5f * std_deviation;
389 |             const float nmax = mean + 0.5f * std_deviation;
390 | 
391 |             // Clamp reprojected sample to local neighborhood
392 |             float shadow_previous = shadow_current;
393 |             if (FFX_DNSR_Shadows_IsFirstFrame() == 0)
394 |             {
395 |                 shadow_previous = FFX_DNSR_Shadows_ReadHistory(history_uv);
396 |             }
397 | 
398 |             shadow_clamped = clamp(shadow_previous, nmin, nmax);
399 | 
400 |             // Reduce history weighting
401 |             float const sigma = 20.0f;
402 |             float const temporal_discontinuity = (shadow_previous - mean) / max(0.5f * std_deviation, 0.001f);
403 |             float const sample_counter_damper = exp(-temporal_discontinuity * temporal_discontinuity / sigma);
404 |             moments_current.z *= sample_counter_damper;
405 | 
406 |             // Boost variance on first frames
407 |             if (moments_current.z < 16.0f)
408 |             {
409 |                 const float variance_boost = max(16.0f - moments_current.z, 1.0f);
410 |                 variance = max(variance, spatial_variance);
411 |                 variance *= variance_boost;
412 |             }
413 |         }
414 | 
415 |         // Perform the temporal blend
416 |         const float history_weight = sqrt(max(8.0f - moments_current.z, 0.0f) / 8.0f);
417 |         shadow_clamped = lerp(shadow_clamped, shadow_current, lerp(0.05f, 1.0f, history_weight));
418 |     }
419 | 
420 |     // Output the results of the temporal pass 
421 |     FFX_DNSR_Shadows_WriteReprojectionResults(did.xy, float2(shadow_clamped, variance));
422 |     FFX_DNSR_Shadows_WriteMoments(did.xy, moments_current);
423 | }
424 | 
425 | #endif
426 | 


--------------------------------------------------------------------------------
/ffx-shadows-dnsr/ffx_denoiser_shadows_util.h:
--------------------------------------------------------------------------------
 1 | /**********************************************************************
 2 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | ********************************************************************/
22 | 
23 | #ifndef FFX_DNSR_SHADOWS_UTILS_HLSL
24 | #define FFX_DNSR_SHADOWS_UTILS_HLSL
25 | 
26 | uint FFX_DNSR_Shadows_RoundedDivide(uint value, uint divisor)
27 | {
28 |     return (value + divisor - 1) / divisor;
29 | }
30 | 
31 | uint2 FFX_DNSR_Shadows_GetTileIndexFromPixelPosition(uint2 pixel_pos)
32 | {
33 |     return uint2(pixel_pos.x / 8, pixel_pos.y / 4);
34 | }
35 | 
36 | uint FFX_DNSR_Shadows_LinearTileIndex(uint2 tile_index, uint screen_width)
37 | {
38 |     return tile_index.y * FFX_DNSR_Shadows_RoundedDivide(screen_width, 8) + tile_index.x;
39 | }
40 | 
41 | uint FFX_DNSR_Shadows_GetBitMaskFromPixelPosition(uint2 pixel_pos)
42 | {
43 |     int lane_index = (pixel_pos.y % 4) * 8 + (pixel_pos.x % 8);
44 |     return (1u << lane_index);
45 | }
46 | 
47 | #define TILE_META_DATA_CLEAR_MASK 0b01u
48 | #define TILE_META_DATA_LIGHT_MASK 0b10u
49 | 
50 | // From ffx_a.h
51 | 
52 | uint FFX_DNSR_Shadows_BitfieldExtract(uint src, uint off, uint bits) { uint mask = (1 << bits) - 1; return (src >> off) & mask; } // ABfe
53 | uint FFX_DNSR_Shadows_BitfieldInsert(uint src, uint ins, uint bits) { uint mask = (1 << bits) - 1; return (ins & mask) | (src & (~mask)); } // ABfiM
54 | 
55 | //  LANE TO 8x8 MAPPING
56 | //  ===================
57 | //  00 01 08 09 10 11 18 19 
58 | //  02 03 0a 0b 12 13 1a 1b
59 | //  04 05 0c 0d 14 15 1c 1d
60 | //  06 07 0e 0f 16 17 1e 1f 
61 | //  20 21 28 29 30 31 38 39 
62 | //  22 23 2a 2b 32 33 3a 3b
63 | //  24 25 2c 2d 34 35 3c 3d
64 | //  26 27 2e 2f 36 37 3e 3f 
65 | uint2 FFX_DNSR_Shadows_RemapLane8x8(uint lane) {
66 |     return uint2(FFX_DNSR_Shadows_BitfieldInsert(FFX_DNSR_Shadows_BitfieldExtract(lane, 2u, 3u), lane, 1u)
67 |         , FFX_DNSR_Shadows_BitfieldInsert(FFX_DNSR_Shadows_BitfieldExtract(lane, 3u, 3u)
68 |             , FFX_DNSR_Shadows_BitfieldExtract(lane, 1u, 2u), 2u));
69 | }
70 | 
71 | #endif
72 | 


--------------------------------------------------------------------------------