├── .gitignore
├── Core.meta
├── Core
    ├── AlexMalyutin.CMAA2.asmdef
    ├── AlexMalyutin.CMAA2.asmdef.meta
    ├── AtomicTextureHandle.cs
    ├── AtomicTextureHandle.cs.meta
    ├── CMAA2.Compute.cs
    ├── CMAA2.Compute.cs.meta
    ├── CMAA2.compute
    ├── CMAA2.compute.meta
    ├── CMAA2.hlsl
    ├── CMAA2.hlsl.meta
    ├── CMAA2RenderFeature.cs
    ├── CMAA2RenderFeature.cs.meta
    ├── CMAA2RenderPass.cs
    └── CMAA2RenderPass.cs.meta
├── LICENSE
├── LICENSE.meta
├── README.md
├── README.md.meta
├── THIRD_PARTY_LICENSES.meta
├── THIRD_PARTY_LICENSES
    ├── CMAA2-LICENSE
    └── CMAA2-LICENSE.meta
├── package.json
└── package.json.meta


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/Core.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 6c085ef84026a41fbbe4d7a371d986bd
3 | folderAsset: yes
4 | DefaultImporter:
5 |   externalObjects: {}
6 |   userData: 
7 |   assetBundleName: 
8 |   assetBundleVariant: 
9 | 


--------------------------------------------------------------------------------
/Core/AlexMalyutin.CMAA2.asmdef:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "AlexMalyutin.CMAA2",
 3 |     "rootNamespace": "CMAA2",
 4 |     "references": [
 5 |         "Unity.RenderPipelines.Core.Runtime",
 6 |         "Unity.RenderPipelines.Universal.Runtime"
 7 |     ],
 8 |     "includePlatforms": [],
 9 |     "excludePlatforms": [],
10 |     "allowUnsafeCode": false,
11 |     "overrideReferences": false,
12 |     "precompiledReferences": [],
13 |     "autoReferenced": true,
14 |     "defineConstraints": [],
15 |     "versionDefines": [],
16 |     "noEngineReferences": false
17 | }


--------------------------------------------------------------------------------
/Core/AlexMalyutin.CMAA2.asmdef.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: d7288b684d76f4d00bd8f2e0236e5496
3 | AssemblyDefinitionImporter:
4 |   externalObjects: {}
5 |   userData: 
6 |   assetBundleName: 
7 |   assetBundleVariant: 
8 | 


--------------------------------------------------------------------------------
/Core/AtomicTextureHandle.cs:
--------------------------------------------------------------------------------
 1 | #define DONT_USE_TEXTURE_ATOMICS
 2 | #if PLATFORM_STANDALONE_OSX || DONT_USE_TEXTURE_ATOMICS
 3 | #define TEXTURE_ATOMICS_NOT_SUPPORTED
 4 | #endif
 5 | 
 6 | using UnityEngine;
 7 | using UnityEngine.Experimental.Rendering;
 8 | using UnityEngine.Rendering;
 9 | using UnityEngine.Rendering.RenderGraphModule;
10 | 
11 | namespace CMAA2.Core
12 | {
13 |     public struct AtomicTextureHandle
14 |     {
15 |         public Vector4 Size => new Vector4(Width, Height);
16 | 
17 |         public int Width;
18 |         public int Height;
19 | #if TEXTURE_ATOMICS_NOT_SUPPORTED
20 |         public BufferHandle Handle;
21 | #else
22 |         public TextureHandle Handle;
23 | #endif
24 | 
25 |         public static AtomicTextureHandle CreateTransientUint(IBaseRenderGraphBuilder builder, int width, int height)
26 |         {
27 |             var handle = new AtomicTextureHandle()
28 |             {
29 |                 Width = width,
30 |                 Height = height,
31 |             };
32 | 
33 | #if TEXTURE_ATOMICS_NOT_SUPPORTED
34 |             var desc = new BufferDesc(width * height, sizeof(uint), GraphicsBuffer.Target.Structured);
35 |             handle.Handle = builder.CreateTransientBuffer(desc);
36 |             builder.UseBuffer(handle.Handle, AccessFlags.ReadWrite);
37 | #else
38 |             var desc = new TextureDesc(width, height)
39 |             {
40 |                 format = GraphicsFormat.R8_UInt,
41 |                 enableRandomWrite = true,
42 |             };
43 |             handle.Handle = builder.CreateTransientTexture(desc);
44 |             builder.UseTexture(handle.Handle, AccessFlags.ReadWrite);
45 | #endif
46 |             return handle;
47 |         }
48 | 
49 |         public void Bind(IComputeCommandBuffer cmd, ComputeShader compute, int kernelIndex, string name)
50 |         {
51 | #if TEXTURE_ATOMICS_NOT_SUPPORTED
52 |             cmd.SetComputeBufferParam(compute, kernelIndex, name, Handle);
53 | #else
54 |             cmd.SetComputeTextureParam(compute, kernelIndex, name, Handle);
55 | #endif
56 |         }
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/Core/AtomicTextureHandle.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: afadf6a2eb36480a9f9c5a7c1e2c99fd
3 | timeCreated: 1748460785


--------------------------------------------------------------------------------
/Core/CMAA2.Compute.cs:
--------------------------------------------------------------------------------
  1 | using UnityEngine;
  2 | using UnityEngine.Rendering;
  3 | using UnityEngine.Rendering.RenderGraphModule;
  4 | 
  5 | namespace CMAA2.Core
  6 | {
  7 |     public class CMAA2Compute
  8 |     {
  9 |         private ComputeShader _compute;
 10 |         private readonly int _edgesColor2x2CS;
 11 |         private readonly int _computeDispatchArgsCS;
 12 |         private readonly int _processCandidatesCS;
 13 |         private readonly int _deferredColorApply2x2CS;
 14 | 
 15 |         private ThreadGroupSizes _edgesColor2x2TreadGroupSize;
 16 | 
 17 |         public CMAA2Compute(ComputeShader compute)
 18 |         {
 19 |             _compute = compute;
 20 |             _edgesColor2x2CS = _compute.FindKernel("EdgesColor2x2CS");
 21 |             _compute.GetKernelThreadGroupSizes(_edgesColor2x2CS, out var x, out var y, out var z);
 22 |             _edgesColor2x2TreadGroupSize = new ThreadGroupSizes(x, y, z);
 23 | 
 24 |             _computeDispatchArgsCS = _compute.FindKernel("ComputeDispatchArgsCS");
 25 |             _processCandidatesCS = _compute.FindKernel("ProcessCandidatesCS");
 26 |             _deferredColorApply2x2CS = _compute.FindKernel("DeferredColorApply2x2CS");
 27 |         }
 28 | 
 29 |         public void EdgesColor2x2CS(
 30 |             IComputeCommandBuffer cmd,
 31 |             TextureHandle inColorTexture,
 32 |             Vector2Int textureResolution,
 33 |             TextureHandle workingEdges,
 34 |             SizedBufferHandle workingShapeCandidates,
 35 |             AtomicTextureHandle workingDeferredBlendItemListHeads,
 36 |             BufferHandle workingControlBuffer
 37 |         )
 38 |         {
 39 |             var kernelId = _edgesColor2x2CS;
 40 |             var sampleName = nameof(EdgesColor2x2CS);
 41 | 
 42 |             cmd.BeginSample(sampleName);
 43 | 
 44 |             Bind(cmd, kernelId, "g_inoutColorReadonly", inColorTexture);
 45 |             Bind(cmd, kernelId, "g_workingEdges", workingEdges);
 46 | 
 47 |             Bind(cmd, "g_workingShapeCandidates_Dim", workingShapeCandidates.Dimensions);
 48 |             Bind(cmd, kernelId, "g_workingShapeCandidates", workingShapeCandidates.Buffer);
 49 | 
 50 |             Bind(cmd, kernelId, "g_workingControlBuffer", workingControlBuffer);
 51 | 
 52 |             Bind(cmd, "g_workingDeferredBlendItemListHeads_Width", workingDeferredBlendItemListHeads.Width);
 53 |             workingDeferredBlendItemListHeads.Bind(cmd, _compute, kernelId, "g_workingDeferredBlendItemListHeads");
 54 | 
 55 |             // TODO: ThreadGroups count!
 56 |             int csOutputKernelSizeX = (int)(_edgesColor2x2TreadGroupSize.X - 2); // m_csInputKernelSizeX - 2;
 57 |             int csOutputKernelSizeY = (int)(_edgesColor2x2TreadGroupSize.Y - 2); // m_csInputKernelSizeY - 2;
 58 |             int threadGroupCountX = (textureResolution.x + csOutputKernelSizeX * 2 - 1) / (csOutputKernelSizeX * 2);
 59 |             int threadGroupCountY = (textureResolution.y + csOutputKernelSizeY * 2 - 1) / (csOutputKernelSizeY * 2);
 60 |             cmd.DispatchCompute(_compute, kernelId, threadGroupCountX, threadGroupCountY, 1);
 61 | 
 62 |             cmd.EndSample(sampleName);
 63 |         }
 64 | 
 65 |         public void ComputeDispatchArgsCS(
 66 |             IComputeCommandBuffer cmd,
 67 |             int threadGroupsX,
 68 |             int threadGroupsY,
 69 |             BufferHandle workingControlBuffer,
 70 |             SizedBufferHandle workingDeferredBlendLocationList,
 71 |             SizedBufferHandle workingShapeCandidates,
 72 |             BufferHandle workingExecuteIndirectBuffer
 73 |         )
 74 |         {
 75 |             int kernelId = _computeDispatchArgsCS;
 76 |             var sampleName = nameof(ComputeDispatchArgsCS);
 77 | 
 78 |             cmd.BeginSample(sampleName);
 79 | 
 80 |             Bind(cmd, kernelId, "g_workingControlBuffer", workingControlBuffer);
 81 | 
 82 |             // TODO: Remove passing unnecessary vectors!
 83 |             Bind(cmd, "g_workingDeferredBlendLocationList_Dim", workingDeferredBlendLocationList.Dimensions);
 84 |             Bind(cmd, kernelId, "g_workingDeferredBlendLocationList", workingDeferredBlendLocationList.Buffer);
 85 | 
 86 |             // TODO: Remove passing unnecessary vectors!
 87 |             Bind(cmd, "g_workingShapeCandidates_Dim", workingShapeCandidates.Dimensions);
 88 |             Bind(cmd, kernelId, "g_workingShapeCandidates", workingShapeCandidates.Buffer);
 89 | 
 90 |             // Out
 91 |             Bind(cmd, kernelId, "g_workingExecuteIndirectBuffer", workingExecuteIndirectBuffer);
 92 | 
 93 |             // TODO: ThreadGroups count!
 94 |             cmd.DispatchCompute(_compute, kernelId, threadGroupsX, threadGroupsY, 1);
 95 | 
 96 |             cmd.EndSample(sampleName);
 97 |         }
 98 | 
 99 |         // inColor : Texture2D<float4>
100 |         // workingEdges : RWTexture2D<uint>
101 |         // workingDeferredBlendItemListHeads
102 |         // - MacOS|IOS : RWStructuredBuffer<uint>
103 |         // - Windows   : RWTexture2D<uint>
104 |         // workingShapeCandidates : RWStructuredBuffer<uint>
105 |         // workingDeferredBlendLocationList : RWStructuredBuffer<uint>
106 |         public void ProcessCandidatesCS(
107 |             IComputeCommandBuffer cmd,
108 |             BufferHandle workingExecuteDirectBuffer,
109 |             TextureHandle inColor,
110 |             TextureHandle workingEdges,
111 |             AtomicTextureHandle workingDeferredBlendItemListHeads,
112 |             BufferHandle workingControlBuffer,
113 |             BufferHandle workingDeferredBlendItemList,
114 |             SizedBufferHandle workingShapeCandidates,
115 |             SizedBufferHandle workingDeferredBlendLocationList
116 |         )
117 |         {
118 |             int kernelId = _processCandidatesCS;
119 |             var sampleName = nameof(ProcessCandidatesCS);
120 | 
121 |             cmd.BeginSample(sampleName);
122 | 
123 |             Bind(cmd, kernelId, "g_inoutColorReadonly", inColor);
124 |             Bind(cmd, kernelId, "g_workingEdges", workingEdges);
125 | 
126 |             // NOTE: Size only needed on platforms that don't support texture's atomics operations.
127 |             Bind(cmd, "g_workingDeferredBlendItemListHeads_Width", workingDeferredBlendItemListHeads.Width);
128 |             workingDeferredBlendItemListHeads.Bind(cmd, _compute, kernelId, "g_workingDeferredBlendItemListHeads");
129 | 
130 |             Bind(cmd, kernelId, "g_workingControlBuffer", workingControlBuffer);
131 |             Bind(cmd, kernelId, "g_workingDeferredBlendItemList", workingDeferredBlendItemList);
132 | 
133 |             Bind(cmd, "g_workingDeferredBlendLocationList_Dim", workingDeferredBlendLocationList.Dimensions);
134 |             Bind(cmd, kernelId, "g_workingDeferredBlendLocationList", workingDeferredBlendLocationList.Buffer);
135 | 
136 |             Bind(cmd, "g_workingShapeCandidates_Dim", workingShapeCandidates.Dimensions);
137 |             Bind(cmd, kernelId, "g_workingShapeCandidates", workingShapeCandidates.Buffer);
138 | 
139 |             // TODO: ThreadGroups count!
140 |             // cmd.DispatchCompute(_compute, kernelId, 1, 1, 1);
141 |             cmd.DispatchCompute(_compute, kernelId, workingExecuteDirectBuffer, 0);
142 |             cmd.EndSample(sampleName);
143 |         }
144 | 
145 |         public void DeferredColorApply2x2CS(
146 |             IComputeCommandBuffer cmd,
147 |             BufferHandle workingExecuteIndirectBuffer,
148 |             TextureHandle outColor,
149 |             BufferHandle workingControlBuffer,
150 |             BufferHandle workingDeferredBlendItemList,
151 |             AtomicTextureHandle workingDeferredBlendItemListHeads,
152 |             SizedBufferHandle workingDeferredBlendLocationList
153 |         )
154 |         {
155 |             var kernelId = _deferredColorApply2x2CS;
156 |             var sampleName = nameof(DeferredColorApply2x2CS);
157 | 
158 |             cmd.BeginSample(sampleName);
159 | 
160 |             Bind(cmd, kernelId, "g_inoutColorWriteonly", outColor);
161 |             Bind(cmd, kernelId, "g_workingControlBuffer", workingControlBuffer);
162 |             Bind(cmd, kernelId, "g_workingDeferredBlendItemList", workingDeferredBlendItemList);
163 | 
164 |             // NOTE: Size only needed on platforms that don't support texture's atomics operations.
165 |             Bind(cmd, "g_workingDeferredBlendItemListHeads_Width", workingDeferredBlendItemListHeads.Width);
166 |             workingDeferredBlendItemListHeads.Bind(cmd, _compute, kernelId, "g_workingDeferredBlendItemListHeads");
167 | 
168 |             Bind(cmd, "g_workingDeferredBlendLocationList_Dim", workingDeferredBlendLocationList.Dimensions);
169 |             Bind(cmd, kernelId, "g_workingDeferredBlendLocationList", workingDeferredBlendLocationList.Buffer);
170 | 
171 |             cmd.DispatchCompute(_compute, kernelId, workingExecuteIndirectBuffer, 0);
172 | 
173 |             cmd.EndSample(sampleName);
174 |         }
175 | 
176 |         private void Bind(IComputeCommandBuffer cmd, string name, int value)
177 |         {
178 |             cmd.SetComputeIntParam(_compute, name, value);
179 |         }
180 | 
181 |         private void Bind(IComputeCommandBuffer cmd, string name, Vector4 vector)
182 |         {
183 |             cmd.SetComputeVectorParam(_compute, name, vector);
184 |         }
185 | 
186 |         private void Bind(IComputeCommandBuffer cmd, int kernelId, string name, TextureHandle textureHandle)
187 |         {
188 |             cmd.SetComputeTextureParam(_compute, kernelId, name, textureHandle);
189 |         }
190 | 
191 |         private void Bind(IComputeCommandBuffer cmd, int kernelId, string name, BufferHandle bufferHandle)
192 |         {
193 |             cmd.SetComputeBufferParam(_compute, kernelId, name, bufferHandle);
194 |         }
195 |     }
196 | 
197 |     struct ThreadGroupSizes
198 |     {
199 |         public uint X, Y, Z;
200 | 
201 |         public ThreadGroupSizes(uint x, uint y, uint z)
202 |         {
203 |             X = x;
204 |             Y = y;
205 |             Z = z;
206 |         }
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/Core/CMAA2.Compute.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: f389d29ec0554aa7880ce3989dbc3f66
3 | timeCreated: 1748371512


--------------------------------------------------------------------------------
/Core/CMAA2.compute:
--------------------------------------------------------------------------------
 1 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 2 | // Copyright 2025, Alex Malyutin
 3 | // 
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | //
16 | // This file is a Unity-compatible port of Intel’s CMAA2 (Conservative Morphological Anti-Aliasing)
17 | // originally developed and distributed by Intel Corporation under the Apache 2.0 license.
18 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
19 | 
20 | #if defined(SHADER_API_METAL) || defined(SHADER_API_GLES)
21 | #define PLATFORM_NO_TEXTURE_LOAD_OFFSET
22 | #define PLATFORM_NO_TEXTURE_ATOMICS
23 | #endif
24 | 
25 | #if SHADER_API_D3D11 || SHADER_API_D3D11_9X
26 | #define PLATFORM_NO_TEXTURE_ATOMICS
27 | #endif
28 | 
29 | #define CMAA2_UAV_STORE_TYPED               1   // use typed UAV store
30 | #define CMAA2_UAV_STORE_CONVERT_TO_SRGB     0   // no need to convert to SRGB - R11G11B10_FLOAT does not use SRGB encoding
31 | #define CMAA2_UAV_STORE_TYPED_UNORM_FLOAT   0   // not required for non-float semantics correctness (RWTexture2D<float4>)
32 | 
33 | #include "CMAA2.hlsl"
34 | 
35 | #pragma kernel EdgesColor2x2CS
36 | #pragma kernel ComputeDispatchArgsCS
37 | #pragma kernel ProcessCandidatesCS
38 | #pragma kernel DeferredColorApply2x2CS
39 | 


--------------------------------------------------------------------------------
/Core/CMAA2.compute.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: bdca705f137f466380668861f6afe032
3 | timeCreated: 1748370540


--------------------------------------------------------------------------------
/Core/CMAA2.hlsl:
--------------------------------------------------------------------------------
   1 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
   2 | // Copyright (c) 2018, Intel Corporation
   3 | //
   4 | // Licensed under the Apache License, Version 2.0 ( the "License" );
   5 | // you may not use this file except in compliance with the License.
   6 | // You may obtain a copy of the License at
   7 | // 
   8 | // http://www.apache.org/licenses/LICENSE-2.0
   9 | // 
  10 | // Unless required by applicable law or agreed to in writing, software
  11 | // distributed under the License is distributed on an "AS IS" BASIS,
  12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 | // See the License for the specific language governing permissions and
  14 | // limitations under the License.
  15 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  16 | //
  17 | // Conservative Morphological Anti-Aliasing, version: 2.3
  18 | //
  19 | // Author(s):       Filip Strugar (filip.strugar@intel.com)
  20 | //
  21 | // More info:       https://github.com/GameTechDev/CMAA2
  22 | //
  23 | // Please see https://github.com/GameTechDev/CMAA2/README.md for additional information and a basic integration guide.
  24 | //
  25 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  26 | 
  27 | #ifndef __CMAA2_HLSL__
  28 | #define __CMAA2_HLSL__
  29 | 
  30 | // this line is VA framework specific (ignore/remove when using outside of VA)
  31 | #ifdef VA_COMPILED_AS_SHADER_CODE
  32 | #include "MagicMacrosMagicFile.h"
  33 | #endif
  34 | 
  35 | // Constants that C++/API side needs to know!
  36 | #define CMAA_PACK_SINGLE_SAMPLE_EDGE_TO_HALF_WIDTH  1   // adds more ALU but reduces memory use for edges by half by packing two 4 bit edge info into one R8_UINT texel - helps on all HW except at really low res
  37 | #define CMAA2_CS_INPUT_KERNEL_SIZE_X                16
  38 | #define CMAA2_CS_INPUT_KERNEL_SIZE_Y                16
  39 | 
  40 | // The rest below is shader only code
  41 | #ifndef __cplusplus
  42 | 
  43 | // If the color buffer range is bigger than [0, 1] then use this, otherwise don't (and gain some precision - see https://bartwronski.com/2017/04/02/small-float-formats-r11g11b10f-precision/)
  44 | #ifndef CMAA2_SUPPORT_HDR_COLOR_RANGE
  45 | #define CMAA2_SUPPORT_HDR_COLOR_RANGE 0
  46 | #endif
  47 | 
  48 | // 0 is full color-based edge detection, 1 and 2 are idential log luma based, with the difference bing that 1 loads color and computes log luma in-place (less efficient) while 2 loads precomputed log luma from a separate R8_UNORM texture (more efficient).
  49 | // Luma-based edge detection has a slightly lower quality but better performance so use it as a default; providing luma as a separate texture (or .a channel of the main one) will improve performance.
  50 | // See RGBToLumaForEdges for luma conversions in non-HDR and HDR versions.
  51 | #ifndef CMAA2_EDGE_DETECTION_LUMA_PATH
  52 | #define CMAA2_EDGE_DETECTION_LUMA_PATH 1
  53 | #endif
  54 | 
  55 | // for CMAA2+MSAA support
  56 | #ifndef CMAA_MSAA_SAMPLE_COUNT
  57 | #define CMAA_MSAA_SAMPLE_COUNT 1
  58 | #endif
  59 | 
  60 | #define CMAA2_CS_OUTPUT_KERNEL_SIZE_X               (CMAA2_CS_INPUT_KERNEL_SIZE_X-2)
  61 | #define CMAA2_CS_OUTPUT_KERNEL_SIZE_Y               (CMAA2_CS_INPUT_KERNEL_SIZE_Y-2)
  62 | #define CMAA2_PROCESS_CANDIDATES_NUM_THREADS        128
  63 | #define CMAA2_DEFERRED_APPLY_NUM_THREADS            32
  64 | 
  65 | // Optimization paths
  66 | #define CMAA2_DEFERRED_APPLY_THREADGROUP_SWAP       1   // 1 seems to be better or same on all HW
  67 | #define CMAA2_COLLECT_EXPAND_BLEND_ITEMS            1   // this reschedules final part of work in the ProcessCandidatesCS (where the sampling and blending takes place) from few to all threads to increase hardware thread occupancy
  68 | #ifndef CMAA2_USE_HALF_FLOAT_PRECISION                  
  69 | #define CMAA2_USE_HALF_FLOAT_PRECISION              0   // use half precision by default? (not on by default due to driver issues on various different hardware, but let external code decide to define if needed)
  70 | #endif
  71 | 
  72 | #ifndef CMAA2_UAV_STORE_TYPED
  73 | #error Warning - make sure correct value is set according to D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW & D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE caps for the color UAV format used in g_inoutColorWriteonly
  74 | #define CMAA2_UAV_STORE_TYPED                       1   // use defaults that match the most common scenario: DXGI_FORMAT_R8G8B8A8_UNORM as UAV on a DXGI_FORMAT_R8G8B8A8_UNORM_SRGB resource (no typed stores for sRGB so we have to manually convert)
  75 | #endif
  76 | 
  77 | #ifndef CMAA2_UAV_STORE_CONVERT_TO_SRGB
  78 | #error Warning - make sure correct value is set according to whether manual linear->sRGB color conversion is needed when writing color output to g_inoutColorWriteonly
  79 | #define CMAA2_UAV_STORE_CONVERT_TO_SRGB             1   // use defaults that match the most common scenario: DXGI_FORMAT_R8G8B8A8_UNORM as UAV on a DXGI_FORMAT_R8G8B8A8_UNORM_SRGB resource (no typed stores for sRGB so we have to manually convert)
  80 | #endif
  81 | 
  82 | #ifndef CMAA2_UAV_STORE_TYPED_UNORM_FLOAT
  83 | #error Warning - make sure correct value is set according to the color UAV format used in g_inoutColorWriteonly
  84 | #define CMAA2_UAV_STORE_TYPED_UNORM_FLOAT           1   // for typed UAV stores: set to 1 for all _UNORM formats and to 0 for _FLOAT formats
  85 | #endif
  86 | 
  87 | #if CMAA2_UAV_STORE_TYPED
  88 |     #ifndef CMAA2_UAV_STORE_TYPED_UNORM_FLOAT
  89 |         #error When CMAA2_UAV_STORE_TYPED is set to 1, CMAA2_UAV_STORE_TYPED_UNORM_FLOAT must be set 1 if the color UAV is not a _FLOAT format or 0 if it is.
  90 |     #endif
  91 | #else
  92 |     #ifndef CMAA2_UAV_STORE_UNTYPED_FORMAT
  93 |         #error Error - untyped format required (see FinalUAVStore function for the list)
  94 |     #endif
  95 | #endif
  96 | 
  97 | #if (CMAA2_USE_HALF_FLOAT_PRECISION != 0)
  98 | #error this codepath needs testing - it's likely not valid anymore
  99 | typedef min16float      lpfloat;
 100 | typedef min16float2     lpfloat2;
 101 | typedef min16float3     lpfloat3;
 102 | typedef min16float4     lpfloat4;
 103 | #else
 104 | typedef float           lpfloat;
 105 | typedef float2          lpfloat2;
 106 | typedef float3          lpfloat3;
 107 | typedef float4          lpfloat4;
 108 | #endif
 109 | 
 110 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 111 | // VARIOUS QUALITY SETTINGS
 112 | //
 113 | // Longest line search distance; must be even number; for high perf low quality start from ~32 - the bigger the number, 
 114 | // the nicer the gradients but more costly. Max supported is 128!
 115 | static const uint c_maxLineLength = 86;
 116 | // 
 117 | #ifndef CMAA2_EXTRA_SHARPNESS
 118 |     #define CMAA2_EXTRA_SHARPNESS                   0     // Set to 1 to preserve even more text and shape clarity at the expense of less AA
 119 | #endif
 120 | //
 121 | // It makes sense to slightly drop edge detection thresholds with increase in MSAA sample count, as with the higher
 122 | // MSAA level the overall impact of CMAA2 alone is reduced but the cost increases.
 123 | #define CMAA2_SCALE_QUALITY_WITH_MSAA               0
 124 | //
 125 | // 
 126 | #ifndef CMAA2_STATIC_QUALITY_PRESET
 127 |     #define CMAA2_STATIC_QUALITY_PRESET 2  // 0 - LOW, 1 - MEDIUM, 2 - HIGH, 3 - ULTRA
 128 | #endif
 129 | // presets (for HDR color buffer maybe use higher values)
 130 | #if CMAA2_STATIC_QUALITY_PRESET == 0   // LOW
 131 |     #define g_CMAA2_EdgeThreshold                   lpfloat(0.15)
 132 | #elif CMAA2_STATIC_QUALITY_PRESET == 1 // MEDIUM
 133 |     #define g_CMAA2_EdgeThreshold                   lpfloat(0.10)
 134 | #elif CMAA2_STATIC_QUALITY_PRESET == 2 // HIGH (default)
 135 |     #define g_CMAA2_EdgeThreshold                   lpfloat(0.07)
 136 | #elif CMAA2_STATIC_QUALITY_PRESET == 3 // ULTRA
 137 |     #define g_CMAA2_EdgeThreshold                   lpfloat(0.05)
 138 | #else
 139 |     #error CMAA2_STATIC_QUALITY_PRESET not set?
 140 | #endif
 141 | // 
 142 | #if CMAA2_EXTRA_SHARPNESS
 143 | #define g_CMAA2_LocalContrastAdaptationAmount       lpfloat(0.15)
 144 | #define g_CMAA2_SimpleShapeBlurinessAmount          lpfloat(0.07)
 145 | #else
 146 | #define g_CMAA2_LocalContrastAdaptationAmount       lpfloat(0.10)
 147 | #define g_CMAA2_SimpleShapeBlurinessAmount          lpfloat(0.10)
 148 | #endif
 149 | // 
 150 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 151 | 
 152 | 
 153 | #if CMAA_MSAA_SAMPLE_COUNT > 1
 154 | #define CMAA_MSAA_USE_COMPLEXITY_MASK 1
 155 | #endif
 156 | 
 157 | #if CMAA2_EDGE_DETECTION_LUMA_PATH == 2 || CMAA2_EDGE_DETECTION_LUMA_PATH == 3 || CMAA_MSAA_USE_COMPLEXITY_MASK
 158 | SamplerState                    g_gather_point_clamp_Sampler        : register( s0 );       // there's also a slightly less efficient codepath that avoids Gather for easier porting
 159 | #endif
 160 | 
 161 | // Is the output UAV format R32_UINT for manual shader packing, or a supported UAV store format?
 162 | #if CMAA2_UAV_STORE_TYPED
 163 | #if CMAA2_UAV_STORE_TYPED_UNORM_FLOAT
 164 | RWTexture2D<unorm float4>       g_inoutColorWriteonly               : register( u0 );       // final output color
 165 | #else
 166 | RWTexture2D<lpfloat4>           g_inoutColorWriteonly               : register( u0 );       // final output color
 167 | #endif
 168 | #else
 169 | RWTexture2D<uint>               g_inoutColorWriteonly               : register( u0 );       // final output color
 170 | #endif
 171 | 
 172 | #if CMAA2_EDGE_UNORM
 173 | RWTexture2D<unorm float>        g_workingEdges                      : register( u1 );       // output edges (only used in the fist pass)
 174 | #else
 175 | RWTexture2D<uint>               g_workingEdges                      : register( u1 );       // output edges (only used in the fist pass)
 176 | #endif
 177 | 
 178 | RWStructuredBuffer<uint>        g_workingShapeCandidates            : register( u2 );
 179 | RWStructuredBuffer<uint>        g_workingDeferredBlendLocationList  : register( u3 );
 180 | RWStructuredBuffer<uint2>       g_workingDeferredBlendItemList      : register( u4 );       // 
 181 | #if !defined(PLATFORM_NO_TEXTURE_ATOMICS)
 182 | RWTexture2D<uint>               g_workingDeferredBlendItemListHeads : register( u5 );
 183 | #else // NOTE: Metal doesn't support texture atomics! Using StructuredBuffer instead.
 184 | RWStructuredBuffer<uint>        g_workingDeferredBlendItemListHeads : register( u5 );
 185 | uint                            g_workingDeferredBlendItemListHeads_Width;                  // Width to calc flat index
 186 | #endif
 187 | RWByteAddressBuffer             g_workingControlBuffer              : register( u6 );
 188 | RWByteAddressBuffer             g_workingExecuteIndirectBuffer      : register( u7 );
 189 | 
 190 | #if CMAA_MSAA_SAMPLE_COUNT > 1
 191 | Texture2DArray<lpfloat4>        g_inColorMSReadonly                 : register( t2 );       // input MS color
 192 | Texture2D<lpfloat>              g_inColorMSComplexityMaskReadonly   : register( t1 );       // input MS color control surface
 193 | #else
 194 | Texture2D<lpfloat4>             g_inoutColorReadonly                : register( t0 );       // input color
 195 | #endif
 196 | 
 197 | #if CMAA2_EDGE_DETECTION_LUMA_PATH == 2
 198 | Texture2D<float>                g_inLumaReadonly                    : register( t3 );
 199 | #endif
 200 | 
 201 | #if defined(PLATFORM_NO_TEXTURE_ATOMICS)
 202 | #define BUFFER_DIMENSION(buffer) float2 buffer##_Dim
 203 | #define GET_BUFFER_DIMENSIONS(buffer, count, stride) \
 204 |     count = buffer##_Dim.x; \
 205 |     stride = buffer##_Dim.y;
 206 | #else
 207 | #define BUFFER_DIMENSION(buffer)
 208 | #define GET_BUFFER_DIMENSIONS(buffer, count, stride) buffer.GetDimensions(count, stride)
 209 | #endif
 210 | 
 211 | BUFFER_DIMENSION(g_workingShapeCandidates);
 212 | BUFFER_DIMENSION(g_workingDeferredBlendLocationList);
 213 | 
 214 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 215 | // encoding/decoding of various data such as edges
 216 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 217 | // how .rgba channels from the edge texture maps to pixel edges:
 218 | //
 219 | //                   A - 0x08               (A - there's an edge between us and a pixel above us)
 220 | //              |---------|                 (R - there's an edge between us and a pixel to the right)
 221 | //              |         |                 (G - there's an edge between us and a pixel at the bottom)
 222 | //     0x04 - B |  pixel  | R - 0x01        (B - there's an edge between us and a pixel to the left)
 223 | //              |         |
 224 | //              |_________|
 225 | //                   G - 0x02
 226 | uint PackEdges( lpfloat4 edges )   // input edges are binary 0 or 1
 227 | {
 228 |     return (uint)dot( edges, lpfloat4( 1, 2, 4, 8 ) );
 229 | }
 230 | uint4 UnpackEdges( uint value )
 231 | {
 232 |     int4 ret;
 233 |     ret.x = ( value & 0x01 ) != 0;
 234 |     ret.y = ( value & 0x02 ) != 0;
 235 |     ret.z = ( value & 0x04 ) != 0;
 236 |     ret.w = ( value & 0x08 ) != 0;
 237 |     return ret;
 238 | }
 239 | lpfloat4 UnpackEdgesFlt( uint value )
 240 | {
 241 |     lpfloat4 ret;
 242 |     ret.x = ( value & 0x01 ) != 0;
 243 |     ret.y = ( value & 0x02 ) != 0;
 244 |     ret.z = ( value & 0x04 ) != 0;
 245 |     ret.w = ( value & 0x08 ) != 0;
 246 |     return ret;
 247 | }
 248 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 249 | 
 250 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 251 | // source color & color conversion helpers
 252 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 253 | 
 254 | 
 255 | lpfloat3 LoadSourceColor( uint2 pixelPos, int2 offset, int sampleIndex )
 256 | {
 257 | #if CMAA_MSAA_SAMPLE_COUNT > 1
 258 |     lpfloat3 color = g_inColorMSReadonly.Load( int4( pixelPos, sampleIndex, 0 ), offset ).rgb;
 259 | #else
 260 | #if !defined(PLATFORM_NO_TEXTURE_LOAD_OFFSET)
 261 |     lpfloat3 color = g_inoutColorReadonly.Load( int3( pixelPos, 0 ), offset ).rgb;
 262 | #else
 263 |     lpfloat3 color = g_inoutColorReadonly.Load( int3( pixelPos + offset, 0 ) ).rgb;
 264 | #endif
 265 | #endif
 266 |     return color;
 267 | }
 268 | //
 269 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 270 | // (R11G11B10 conversion code below taken from Miniengine's PixelPacking_R11G11B10.hlsli,  
 271 | // Copyright (c) Microsoft, MIT license, Developed by Minigraph, Author:  James Stanard; original file link:
 272 | // https://github.com/Microsoft/DirectX-Graphics-Samples/blob/master/MiniEngine/Core/Shaders/PixelPacking_R11G11B10.hlsli )
 273 | //
 274 | // The standard 32-bit HDR color format.  Each float has a 5-bit exponent and no sign bit.
 275 | uint Pack_R11G11B10_FLOAT( float3 rgb )
 276 | {
 277 |     // Clamp upper bound so that it doesn't accidentally round up to INF 
 278 |     // Exponent=15, Mantissa=1.11111
 279 |     rgb = min(rgb, asfloat(0x477C0000));  
 280 |     uint r = ((f32tof16(rgb.x) + 8) >> 4) & 0x000007FF;
 281 |     uint g = ((f32tof16(rgb.y) + 8) << 7) & 0x003FF800;
 282 |     uint b = ((f32tof16(rgb.z) + 16) << 17) & 0xFFC00000;
 283 |     return r | g | b;
 284 | }
 285 | 
 286 | float3 Unpack_R11G11B10_FLOAT( uint rgb )
 287 | {
 288 |     float r = f16tof32((rgb << 4 ) & 0x7FF0);
 289 |     float g = f16tof32((rgb >> 7 ) & 0x7FF0);
 290 |     float b = f16tof32((rgb >> 17) & 0x7FE0);
 291 |     return float3(r, g, b);
 292 | }
 293 | //
 294 | // These next two encodings are great for LDR data.  By knowing that our values are [0.0, 1.0]
 295 | // (or [0.0, 2.0), incidentally), we can reduce how many bits we need in the exponent.  We can
 296 | // immediately eliminate all postive exponents.  By giving more bits to the mantissa, we can
 297 | // improve precision at the expense of range.  The 8E3 format goes one bit further, quadrupling
 298 | // mantissa precision but increasing smallest exponent from -14 to -6.  The smallest value of 8E3
 299 | // is 2^-14, while the smallest value of 7E4 is 2^-21.  Both are smaller than the smallest 8-bit
 300 | // sRGB value, which is close to 2^-12.
 301 | //
 302 | // This is like R11G11B10_FLOAT except that it moves one bit from each exponent to each mantissa.
 303 | uint Pack_R11G11B10_E4_FLOAT( float3 rgb )
 304 | {
 305 |     // Clamp to [0.0, 2.0).  The magic number is 1.FFFFF x 2^0.  (We can't represent hex floats in HLSL.)
 306 |     // This trick works because clamping your exponent to 0 reduces the number of bits needed by 1.
 307 |     rgb = clamp( rgb, 0.0, asfloat(0x3FFFFFFF) );
 308 |     uint r = ((f32tof16(rgb.r) + 4) >> 3 ) & 0x000007FF;
 309 |     uint g = ((f32tof16(rgb.g) + 4) << 8 ) & 0x003FF800;
 310 |     uint b = ((f32tof16(rgb.b) + 8) << 18) & 0xFFC00000;
 311 |     return r | g | b;
 312 | }
 313 | //
 314 | float3 Unpack_R11G11B10_E4_FLOAT( uint rgb )
 315 | {
 316 |     float r = f16tof32((rgb << 3 ) & 0x3FF8);
 317 |     float g = f16tof32((rgb >> 8 ) & 0x3FF8);
 318 |     float b = f16tof32((rgb >> 18) & 0x3FF0);
 319 |     return float3(r, g, b);
 320 | }
 321 | //
 322 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 323 | // This is for temporary storage - R11G11B10_E4 covers 8bit per channel sRGB well enough; 
 324 | // For HDR range (CMAA2_SUPPORT_HDR_COLOR_RANGE) use standard float packing - not using it by default because it's not precise 
 325 | // enough to match sRGB 8bit, but in a HDR scenario we simply need the range.
 326 | // For even more precision un LDR try E3 version and there are other options for HDR range (see above 
 327 | // PixelPacking_R11G11GB10.hlsli link for a number of excellent options).
 328 | // It's worth noting that since CMAA2 works on high contrast edges, the lack of precision will not be nearly as
 329 | // noticeable as it would be on gradients (which always remain unaffected).
 330 | lpfloat3 InternalUnpackColor( uint packedColor )
 331 | {
 332 | #if CMAA2_SUPPORT_HDR_COLOR_RANGE
 333 |     // ideally using 32bit packing is best for performance reasons but there might be precision issues: look into
 334 |     // 
 335 |     return Unpack_R11G11B10_FLOAT( packedColor );
 336 | #else
 337 |     return Unpack_R11G11B10_E4_FLOAT( packedColor );
 338 | #endif
 339 | }
 340 | //
 341 | uint InternalPackColor( lpfloat3 color )
 342 | {
 343 | #if CMAA2_SUPPORT_HDR_COLOR_RANGE
 344 |     return Pack_R11G11B10_FLOAT( color );
 345 | #else
 346 |     return Pack_R11G11B10_E4_FLOAT( color );
 347 | #endif
 348 | }
 349 | //
 350 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 351 | //
 352 | void StoreColorSample( uint2 pixelPos, lpfloat3 color, bool isComplexShape, uint msaaSampleIndex )
 353 | {
 354 |     uint counterIndex;  g_workingControlBuffer.InterlockedAdd( 4*12, 1, counterIndex );
 355 | 
 356 |     // quad coordinates
 357 |     uint2 quadPos       = pixelPos / uint2( 2, 2 );
 358 |     // 2x2 inter-quad coordinates
 359 |     uint offsetXY       = (pixelPos.y % 2) * 2 + (pixelPos.x % 2);
 360 |     // encode item-specific info: {2 bits for 2x2 quad location}, {3 bits for MSAA sample index}, {1 bit for isComplexShape flag}, {26 bits left for address (index)}
 361 |     uint header         = ( offsetXY << 30 ) | ( msaaSampleIndex << 27 ) | ( isComplexShape << 26 );
 362 | 
 363 |     uint counterIndexWithHeader = counterIndex | header;
 364 | 
 365 |     uint originalIndex;
 366 | #if !defined(PLATFORM_NO_TEXTURE_ATOMICS)
 367 |     InterlockedExchange( g_workingDeferredBlendItemListHeads[ quadPos ], counterIndexWithHeader, originalIndex );
 368 | #else
 369 |     uint quadPosFlat = quadPos.x + quadPos.y * g_workingDeferredBlendItemListHeads_Width;
 370 |     InterlockedExchange( g_workingDeferredBlendItemListHeads[ quadPosFlat ], counterIndexWithHeader, originalIndex );
 371 | #endif
 372 |     g_workingDeferredBlendItemList[counterIndex] = uint2( originalIndex, InternalPackColor( color ) );
 373 | 
 374 |     // First one added?
 375 |     if( originalIndex == 0xFFFFFFFF )
 376 |     {
 377 |         // Make a list of all edge pixels - these cover all potential pixels where AA is applied.
 378 |         uint edgeListCounter;  g_workingControlBuffer.InterlockedAdd( 4*8, 1, edgeListCounter );
 379 |         g_workingDeferredBlendLocationList[edgeListCounter] = (quadPos.x << 16) | quadPos.y;
 380 |     }
 381 | }
 382 | //
 383 | #if CMAA2_COLLECT_EXPAND_BLEND_ITEMS
 384 | #define CMAA2_BLEND_ITEM_SLM_SIZE           768         // there's a fallback for extreme cases (observed with this value set to 256 or below) in which case image will remain correct but performance will suffer
 385 | groupshared uint        g_groupSharedBlendItemCount;
 386 | groupshared uint2       g_groupSharedBlendItems[ CMAA2_BLEND_ITEM_SLM_SIZE ];
 387 | #endif
 388 | //
 389 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 390 | // Untyped UAV store packing & sRGB conversion helpers
 391 | //
 392 | lpfloat LINEAR_to_SRGB( lpfloat val )
 393 | {
 394 |     if( val < 0.0031308 )
 395 |         val *= lpfloat( 12.92 );
 396 |     else
 397 |         val = lpfloat( 1.055 ) * pow( abs( val ), lpfloat( 1.0 ) / lpfloat( 2.4 ) ) - lpfloat( 0.055 );
 398 |     return val;
 399 | }
 400 | lpfloat3 LINEAR_to_SRGB( lpfloat3 val )
 401 | {
 402 |     return lpfloat3( LINEAR_to_SRGB( val.x ), LINEAR_to_SRGB( val.y ), LINEAR_to_SRGB( val.z ) );
 403 | }
 404 | //
 405 | uint FLOAT4_to_R8G8B8A8_UNORM( lpfloat4 unpackedInput )
 406 | {
 407 |     return (( uint( saturate( unpackedInput.x ) * 255 + 0.5 ) ) |
 408 |             ( uint( saturate( unpackedInput.y ) * 255 + 0.5 ) << 8 ) |
 409 |             ( uint( saturate( unpackedInput.z ) * 255 + 0.5 ) << 16 ) |
 410 |             ( uint( saturate( unpackedInput.w ) * 255 + 0.5 ) << 24 ) );
 411 | }
 412 | //
 413 | uint FLOAT4_to_R10G10B10A2_UNORM( lpfloat4 unpackedInput )
 414 | {
 415 |     return (( uint( saturate( unpackedInput.x ) * 1023 + 0.5    ) ) |
 416 |             ( uint( saturate( unpackedInput.y ) * 1023 + 0.5    ) << 10 ) |
 417 |             ( uint( saturate( unpackedInput.z ) * 1023 + 0.5    ) << 20 ) |
 418 |             ( uint( saturate( unpackedInput.w ) * 3 + 0.5       ) << 30 ) );
 419 | }
 420 | //
 421 | // This handles various permutations for various formats with no/partial/full typed UAV store support
 422 | void FinalUAVStore( uint2 pixelPos, lpfloat3 color )
 423 | {
 424 | #if CMAA2_UAV_STORE_CONVERT_TO_SRGB
 425 |     color = LINEAR_to_SRGB( color ) ;
 426 | #endif
 427 | 
 428 | #if CMAA2_UAV_STORE_TYPED
 429 |     g_inoutColorWriteonly[ pixelPos ] = lpfloat4( color.rgb, 0 );
 430 | #else
 431 |     #if CMAA2_UAV_STORE_UNTYPED_FORMAT == 1     // R8G8B8A8_UNORM (or R8G8B8A8_UNORM_SRGB with CMAA2_UAV_STORE_CONVERT_TO_SRGB)
 432 |         g_inoutColorWriteonly[ pixelPos ] = FLOAT4_to_R8G8B8A8_UNORM( lpfloat4( color, 0 ) );
 433 |     #elif CMAA2_UAV_STORE_UNTYPED_FORMAT == 2   // R10G10B10A2_UNORM (or R10G10B10A2_UNORM_SRGB with CMAA2_UAV_STORE_CONVERT_TO_SRGB)
 434 |         g_inoutColorWriteonly[ pixelPos ] = FLOAT4_to_R10G10B10A2_UNORM( lpfloat4( color, 0 ) );
 435 |     #else
 436 |         #error CMAA color packing format not defined - add it here!
 437 |     #endif
 438 | #endif
 439 | }
 440 | //
 441 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 442 | 
 443 | 
 444 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 445 | // Edge detection and local contrast adaptation helpers
 446 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 447 | //
 448 | lpfloat GetActualEdgeThreshold( )
 449 | {
 450 |     lpfloat retVal = g_CMAA2_EdgeThreshold;
 451 | #if CMAA2_SCALE_QUALITY_WITH_MSAA
 452 |     retVal *= 1.0 + (CMAA_MSAA_SAMPLE_COUNT-1) * 0.06;
 453 | #endif
 454 |     return retVal;
 455 | }
 456 | //
 457 | lpfloat EdgeDetectColorCalcDiff( lpfloat3 colorA, lpfloat3 colorB )
 458 | {
 459 |     const lpfloat3 LumWeights = lpfloat3( 0.299, 0.587, 0.114 );
 460 |     lpfloat3 diff = abs( (colorA.rgb - colorB.rgb) );
 461 |     return dot( diff.rgb, LumWeights.rgb );
 462 | }
 463 | //
 464 | // apply custom curve / processing to put input color (linear) in the format required by ComputeEdge
 465 | lpfloat3 ProcessColorForEdgeDetect( lpfloat3 color )
 466 | {
 467 |     //pixelColors[i] = LINEAR_to_SRGB( pixelColors[i] );            // correct reference
 468 |     //pixelColors[i] = pow( max( 0, pixelColors[i], 1.0 / 2.4 ) );  // approximate sRGB curve
 469 |     return sqrt( color ); // just very roughly approximate RGB curve
 470 | }
 471 | //
 472 | lpfloat2 ComputeEdge( int x, int y, lpfloat3 pixelColors[3 * 3 - 1] )
 473 | {
 474 |     lpfloat2 temp;
 475 |     temp.x = EdgeDetectColorCalcDiff( pixelColors[x + y * 3].rgb, pixelColors[x + 1 + y * 3].rgb );
 476 |     temp.y = EdgeDetectColorCalcDiff( pixelColors[x + y * 3].rgb, pixelColors[x + ( y + 1 ) * 3].rgb );
 477 |     return temp;    // for HDR edge detection it might be good to premultiply both of these by some factor - otherwise clamping to 1 might prevent some local contrast adaptation. It's a very minor nitpick though, unlikely to significantly affect things.
 478 | }                                     
 479 | // color -> log luma-for-edges conversion
 480 | float RGBToLumaForEdges( float3 linearRGB )
 481 | {
 482 | #if 0
 483 |     // this matches Miniengine luma path
 484 |     float Luma = dot( linearRGB, float3(0.212671, 0.715160, 0.072169) );
 485 |     return log2(1 + Luma * 15) / 4;
 486 | #else
 487 |     // this is what original FXAA (and consequently CMAA2) use by default - these coefficients correspond to Rec. 601 and those should be
 488 |     // used on gamma-compressed components (see https://en.wikipedia.org/wiki/Luma_(video)#Rec._601_luma_versus_Rec._709_luma_coefficients), 
 489 |     float luma = dot( sqrt( linearRGB.rgb ), float3( 0.299, 0.587, 0.114 ) );  // http://en.wikipedia.org/wiki/CCIR_601
 490 |     // using sqrt luma for now but log luma like in miniengine provides a nicer curve on the low-end
 491 |     return luma;
 492 | #endif
 493 | }
 494 | lpfloat2 ComputeEdgeLuma( int x, int y, lpfloat pixelLumas[3 * 3 - 1] )
 495 | {
 496 |     lpfloat2 temp;
 497 |     temp.x = abs( pixelLumas[x + y * 3] - pixelLumas[x + 1 + y * 3] );
 498 |     temp.y = abs( pixelLumas[x + y * 3] - pixelLumas[x + ( y + 1 ) * 3] );
 499 |     return temp;    // for HDR edge detection it might be good to premultiply both of these by some factor - otherwise clamping to 1 might prevent some local contrast adaptation. It's a very minor nitpick though, unlikely to significantly affect things.
 500 | }
 501 | //
 502 | lpfloat ComputeLocalContrastV( int x, int y, in lpfloat2 neighbourhood[4][4] )
 503 | {
 504 |     // new, small kernel 4-connecting-edges-only local contrast adaptation
 505 |     return max( max( neighbourhood[x + 1][y + 0].y, neighbourhood[x + 1][y + 1].y ), max( neighbourhood[x + 2][y + 0].y, neighbourhood[x + 2][y + 1].y ) ) * lpfloat( g_CMAA2_LocalContrastAdaptationAmount );
 506 | 
 507 | //    // slightly bigger kernel that enhances edges in-line (not worth the cost)
 508 | //  return ( max( max( neighbourhood[x + 1][y + 0].y, neighbourhood[x + 1][y + 1].y ), max( neighbourhood[x + 2][y + 0].y, neighbourhood[x + 2][y + 1].y ) ) 
 509 | //        - ( neighbourhood[x + 1][y + 0].x + neighbourhood[x + 1][y + 2].x ) * 0.3 ) * lpfloat( g_CMAA2_LocalContrastAdaptationAmount );
 510 | }
 511 | //
 512 | lpfloat ComputeLocalContrastH( int x, int y, in lpfloat2 neighbourhood[4][4] )
 513 | {
 514 |     // new, small kernel 4-connecting-edges-only local contrast adaptation
 515 |     return max( max( neighbourhood[x + 0][y + 1].x, neighbourhood[x + 1][y + 1].x ), max( neighbourhood[x + 0][y + 2].x, neighbourhood[x + 1][y + 2].x ) ) * lpfloat( g_CMAA2_LocalContrastAdaptationAmount );
 516 | 
 517 | //    // slightly bigger kernel that enhances edges in-line (not worth the cost)
 518 | //    return ( max( max( neighbourhood[x + 0][y + 1].x, neighbourhood[x + 1][y + 1].x ), max( neighbourhood[x + 0][y + 2].x, neighbourhood[x + 1][y + 2].x ) ) 
 519 | //        - ( neighbourhood[x + 0][y + 1].y + neighbourhood[x + 2][y + 1].y ) * 0.3 ) * lpfloat( g_CMAA2_LocalContrastAdaptationAmount );
 520 | }
 521 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 522 | 
 523 | lpfloat4 ComputeSimpleShapeBlendValues( lpfloat4 edges, lpfloat4 edgesLeft, lpfloat4 edgesRight, lpfloat4 edgesTop, lpfloat4 edgesBottom, uniform bool dontTestShapeValidity )
 524 | {
 525 |     // a 3x3 kernel for higher quality handling of L-based shapes (still rather basic and conservative)
 526 | 
 527 |     lpfloat fromRight   = edges.r;
 528 |     lpfloat fromBelow   = edges.g;
 529 |     lpfloat fromLeft    = edges.b;
 530 |     lpfloat fromAbove   = edges.a;
 531 | 
 532 |     lpfloat blurCoeff = lpfloat( g_CMAA2_SimpleShapeBlurinessAmount );
 533 | 
 534 |     lpfloat numberOfEdges = dot( edges, lpfloat4( 1, 1, 1, 1 ) );
 535 | 
 536 |     lpfloat numberOfEdgesAllAround = dot(edgesLeft.bga + edgesRight.rga + edgesTop.rba + edgesBottom.rgb, lpfloat3( 1, 1, 1 ) );
 537 | 
 538 |     // skip if already tested for before calling this function
 539 |     if( !dontTestShapeValidity )
 540 |     {
 541 |         // No blur for straight edge
 542 |         if( numberOfEdges == 1 )
 543 |             blurCoeff = 0;
 544 | 
 545 |         // L-like step shape ( only blur if it's a corner, not if it's two parallel edges)
 546 |         if( numberOfEdges == 2 )
 547 |             blurCoeff *= ( ( lpfloat(1.0) - fromBelow * fromAbove ) * ( lpfloat(1.0) - fromRight * fromLeft ) );
 548 |     }
 549 | 
 550 |     // L-like step shape
 551 |     //[branch]
 552 |     if( numberOfEdges == 2 )
 553 |     {
 554 |         blurCoeff *= 0.75;
 555 | 
 556 | #if 1
 557 |         float k = 0.9f;
 558 | #if 0
 559 |         fromRight   += k * (edges.g * edgesTop.r +      edges.a * edgesBottom.r );
 560 |         fromBelow   += k * (edges.r * edgesLeft.g +     edges.b * edgesRight.g );
 561 |         fromLeft    += k * (edges.g * edgesTop.b +      edges.a * edgesBottom.b );
 562 |         fromAbove   += k * (edges.b * edgesRight.a +    edges.r * edgesLeft.a );
 563 | #else
 564 |         fromRight   += k * (edges.g * edgesTop.r     * (1.0-edgesLeft.g)   +     edges.a * edgesBottom.r   * (1.0-edgesLeft.a)      );
 565 |         fromBelow   += k * (edges.b * edgesRight.g   * (1.0-edgesTop.b)    +     edges.r * edgesLeft.g     * (1.0-edgesTop.r)       );
 566 |         fromLeft    += k * (edges.a * edgesBottom.b  * (1.0-edgesRight.a)  +     edges.g * edgesTop.b      * (1.0-edgesRight.g)     );
 567 |         fromAbove   += k * (edges.r * edgesLeft.a    * (1.0-edgesBottom.r) +     edges.b * edgesRight.a   *  (1.0-edgesBottom.b)    );
 568 | #endif
 569 | #endif
 570 |     }
 571 | 
 572 |     // if( numberOfEdges == 3 )
 573 |     //     blurCoeff *= 0.95;
 574 | 
 575 |     // Dampen the blurring effect when lots of neighbouring edges - additionally preserves text and texture detail
 576 | #if CMAA2_EXTRA_SHARPNESS
 577 |     blurCoeff *= saturate( 1.15 - numberOfEdgesAllAround / 8.0 );
 578 | #else
 579 |     blurCoeff *= saturate( 1.30 - numberOfEdgesAllAround / 10.0 );
 580 | #endif
 581 | 
 582 |     return lpfloat4( fromLeft, fromAbove, fromRight, fromBelow ) * blurCoeff;
 583 | }
 584 | 
 585 | uint LoadEdge( int2 pixelPos, int2 offset, uint msaaSampleIndex )
 586 | {
 587 | #if CMAA_MSAA_SAMPLE_COUNT > 1
 588 |     uint edge = g_workingEdges.Load( pixelPos + offset ).x;
 589 |     edge = (edge >> (msaaSampleIndex*4)) & 0xF;
 590 | #else
 591 | #if CMAA_PACK_SINGLE_SAMPLE_EDGE_TO_HALF_WIDTH
 592 |     uint a      = uint(pixelPos.x+offset.x) % 2;
 593 | 
 594 | #if CMAA2_EDGE_UNORM
 595 |     uint edge   = (uint)(g_workingEdges.Load( uint2( uint(pixelPos.x+offset.x)/2, pixelPos.y + offset.y ) ).x * 255.0 + 0.5);
 596 | #else    
 597 |     uint edge   = g_workingEdges.Load( uint2( uint(pixelPos.x+offset.x)/2, pixelPos.y + offset.y ) ).x;
 598 | #endif
 599 |     edge = (edge >> (a*4)) & 0xF;
 600 | #else
 601 |     uint edge   = g_workingEdges.Load( pixelPos + offset ).x;
 602 | #endif
 603 | #endif
 604 |     return edge;
 605 | }
 606 | 
 607 | groupshared lpfloat4 g_groupShared2x2FracEdgesH[CMAA2_CS_INPUT_KERNEL_SIZE_X * CMAA2_CS_INPUT_KERNEL_SIZE_Y];
 608 | groupshared lpfloat4 g_groupShared2x2FracEdgesV[CMAA2_CS_INPUT_KERNEL_SIZE_X * CMAA2_CS_INPUT_KERNEL_SIZE_Y];
 609 | // void GroupsharedLoadQuadH( uint addr, out lpfloat e00, out lpfloat e10, out lpfloat e01, out lpfloat e11 ) { lpfloat4 val = g_groupShared2x2FracEdgesH[addr]; e00 = val.x; e10 = val.y; e01 = val.z; e11 = val.w; }
 610 | // void GroupsharedLoadQuadV( uint addr, out lpfloat e00, out lpfloat e10, out lpfloat e01, out lpfloat e11 ) { lpfloat4 val = g_groupShared2x2FracEdgesV[addr]; e00 = val.x; e10 = val.y; e01 = val.z; e11 = val.w; }
 611 | void GroupsharedLoadQuadHV( uint addr, out lpfloat2 e00, out lpfloat2 e10, out lpfloat2 e01, out lpfloat2 e11 ) 
 612 | { 
 613 |     lpfloat4 valH = g_groupShared2x2FracEdgesH[addr]; e00.y = valH.x; e10.y = valH.y; e01.y = valH.z; e11.y = valH.w; 
 614 |     lpfloat4 valV = g_groupShared2x2FracEdgesV[addr]; e00.x = valV.x; e10.x = valV.y; e01.x = valV.z; e11.x = valV.w; 
 615 | }
 616 | 
 617 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 618 | // Edge detection compute shader
 619 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 620 | //groupshared uint g_groupShared2x2ProcColors[(CMAA2_CS_INPUT_KERNEL_SIZE_X * 2 + 1) * (CMAA2_CS_INPUT_KERNEL_SIZE_Y * 2 + 1)];
 621 | //groupshared float3 g_groupSharedResolvedMSColors[(CMAA2_CS_INPUT_KERNEL_SIZE_X * 2 + 1) * (CMAA2_CS_INPUT_KERNEL_SIZE_Y * 2 + 1)];
 622 | //
 623 | [numthreads( CMAA2_CS_INPUT_KERNEL_SIZE_X, CMAA2_CS_INPUT_KERNEL_SIZE_Y, 1 )]
 624 | void EdgesColor2x2CS( uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID )
 625 | {
 626 |     // screen position in the input (expanded) kernel (shifted one 2x2 block up/left)
 627 |     uint2 pixelPos = groupID.xy * int2( CMAA2_CS_OUTPUT_KERNEL_SIZE_X, CMAA2_CS_OUTPUT_KERNEL_SIZE_Y ) + groupThreadID.xy - int2( 1, 1 );
 628 |     pixelPos *= int2( 2, 2 );
 629 | 
 630 |     const uint2 qeOffsets[4]        = { {0, 0}, {1, 0}, {0, 1}, {1, 1} };
 631 |     const uint rowStride2x2         = CMAA2_CS_INPUT_KERNEL_SIZE_X;
 632 |     const uint centerAddr2x2        = groupThreadID.x + groupThreadID.y * rowStride2x2;
 633 |     // const uint msaaSliceStride2x2   = CMAA2_CS_INPUT_KERNEL_SIZE_X * CMAA2_CS_INPUT_KERNEL_SIZE_Y;
 634 |     const bool inOutputKernel       = !any( bool4( groupThreadID.x == ( CMAA2_CS_INPUT_KERNEL_SIZE_X - 1 ), groupThreadID.x == 0, groupThreadID.y == ( CMAA2_CS_INPUT_KERNEL_SIZE_Y - 1 ), groupThreadID.y == 0 ) );
 635 | 
 636 |     uint i;
 637 |     lpfloat2 qe0, qe1, qe2, qe3;
 638 |     uint4 outEdges = { 0, 0, 0, 0 };
 639 | 
 640 | #if CMAA_MSAA_SAMPLE_COUNT > 1
 641 |     bool firstLoopIsEnough = false;
 642 | 
 643 |     #if CMAA_MSAA_USE_COMPLEXITY_MASK
 644 |     {
 645 |         float2 texSize;
 646 |         g_inColorMSComplexityMaskReadonly.GetDimensions( texSize.x, texSize.y );
 647 |         float2 gatherUV = float2(pixelPos) / texSize;
 648 |         float4 TL = g_inColorMSComplexityMaskReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 0, 0 ) );
 649 |         float4 TR = g_inColorMSComplexityMaskReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 2, 0 ) );
 650 |         float4 BL = g_inColorMSComplexityMaskReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 0, 2 ) );
 651 |         float4 BR = g_inColorMSComplexityMaskReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 2, 2 ) );
 652 |         float4 sumAll = TL+TR+BL+BR;
 653 |         firstLoopIsEnough = !any(sumAll);
 654 |     }
 655 |     #endif
 656 | #endif
 657 | 
 658 | 
 659 |     // not optimal - to be optimized
 660 | #if CMAA_MSAA_SAMPLE_COUNT > 1
 661 |     // clear this here to reduce complexity below - turns out it's quicker as well this way
 662 |     g_workingDeferredBlendItemListHeads[ uint2( pixelPos ) / 2 ] = 0xFFFFFFFF;
 663 |     [loop]
 664 |     for( uint msaaSampleIndex = 0; msaaSampleIndex < CMAA_MSAA_SAMPLE_COUNT; msaaSampleIndex++ )
 665 |     {
 666 |         bool msaaSampleIsRelevant = !firstLoopIsEnough || msaaSampleIndex == 0;
 667 |         [branch]
 668 |         if( msaaSampleIsRelevant )
 669 |         {
 670 | #else
 671 |         {
 672 |             uint msaaSampleIndex = 0;
 673 | #endif
 674 | 
 675 | 
 676 |             // edge detection
 677 | #if CMAA2_EDGE_DETECTION_LUMA_PATH == 0
 678 |             lpfloat3 pixelColors[3 * 3 - 1];
 679 |             [unroll]
 680 |             for( i = 0; i < 3 * 3 - 1; i++ )
 681 |                 pixelColors[i] = LoadSourceColor( pixelPos, int2( i % 3, i / 3 ), msaaSampleIndex ).rgb;
 682 | 
 683 |             [unroll]
 684 |             for( i = 0; i < 3 * 3 - 1; i++ )
 685 |                 pixelColors[i] = ProcessColorForEdgeDetect( pixelColors[i] );
 686 | 
 687 |             qe0 = ComputeEdge( 0, 0, pixelColors );
 688 |             qe1 = ComputeEdge( 1, 0, pixelColors );
 689 |             qe2 = ComputeEdge( 0, 1, pixelColors );
 690 |             qe3 = ComputeEdge( 1, 1, pixelColors );
 691 | #else // CMAA2_EDGE_DETECTION_LUMA_PATH != 0
 692 |             lpfloat pixelLumas[3 * 3 - 1];
 693 |     #if CMAA2_EDGE_DETECTION_LUMA_PATH == 1 // compute in-place
 694 |             [unroll]
 695 |             for( i = 0; i < 3 * 3 - 1; i++ )
 696 |             {
 697 |                 lpfloat3 color = LoadSourceColor( pixelPos, int2( i % 3, i / 3 ), msaaSampleIndex ).rgb;
 698 |                 pixelLumas[i] = RGBToLumaForEdges( color );
 699 |             }
 700 |     #elif CMAA2_EDGE_DETECTION_LUMA_PATH == 2 // source from outside
 701 |     #if 0 // same as below, just without Gather
 702 |             [unroll]
 703 |             for( i = 0; i < 3 * 3 - 1; i++ )
 704 |                  pixelLumas[i] = g_inLumaReadonly.Load( int3( pixelPos, 0 ), int2( i % 3, i / 3 ) ).r;
 705 |     #else
 706 |             float2 texSize;
 707 |             g_inLumaReadonly.GetDimensions( texSize.x, texSize.y );
 708 |             float2 gatherUV = (float2(pixelPos) + float2( 0.5, 0.5 )) / texSize;
 709 |             float4 TL = g_inLumaReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV );
 710 |             float4 TR = g_inLumaReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 1, 0 ) );
 711 |             float4 BL = g_inLumaReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 0, 1 ) );
 712 |             pixelLumas[0] = TL.w; pixelLumas[1] = TL.z; pixelLumas[2] = TR.z; pixelLumas[3] = TL.x;
 713 |             pixelLumas[4] = TL.y; pixelLumas[5] = TR.y; pixelLumas[6] = BL.x; pixelLumas[7] = BL.y;
 714 |     #endif
 715 |     #elif CMAA2_EDGE_DETECTION_LUMA_PATH == 3 // source in alpha channel of input color
 716 |             float2 texSize;
 717 |             g_inoutColorReadonly.GetDimensions( texSize.x, texSize.y );
 718 |             float2 gatherUV = (float2(pixelPos) + float2( 0.5, 0.5 )) / texSize;
 719 |             float4 TL = g_inoutColorReadonly.GatherAlpha( g_gather_point_clamp_Sampler, gatherUV );
 720 |             float4 TR = g_inoutColorReadonly.GatherAlpha( g_gather_point_clamp_Sampler, gatherUV, int2( 1, 0 ) );
 721 |             float4 BL = g_inoutColorReadonly.GatherAlpha( g_gather_point_clamp_Sampler, gatherUV, int2( 0, 1 ) );
 722 |             pixelLumas[0] = (lpfloat)TL.w; pixelLumas[1] = (lpfloat)TL.z; pixelLumas[2] = (lpfloat)TR.z; pixelLumas[3] = (lpfloat)TL.x; 
 723 |             pixelLumas[4] = (lpfloat)TL.y; pixelLumas[5] = (lpfloat)TR.y; pixelLumas[6] = (lpfloat)BL.x; pixelLumas[7] = (lpfloat)BL.y;                 
 724 |     #endif
 725 |             qe0 = ComputeEdgeLuma( 0, 0, pixelLumas );
 726 |             qe1 = ComputeEdgeLuma( 1, 0, pixelLumas );
 727 |             qe2 = ComputeEdgeLuma( 0, 1, pixelLumas );
 728 |             qe3 = ComputeEdgeLuma( 1, 1, pixelLumas );
 729 | #endif
 730 | 
 731 |             g_groupShared2x2FracEdgesV[centerAddr2x2 + rowStride2x2 * 0] = lpfloat4( qe0.x, qe1.x, qe2.x, qe3.x );
 732 |             g_groupShared2x2FracEdgesH[centerAddr2x2 + rowStride2x2 * 0] = lpfloat4( qe0.y, qe1.y, qe2.y, qe3.y );
 733 |      
 734 | #if CMAA_MSAA_SAMPLE_COUNT > 1
 735 |          }  // if( msaaSampleIsRelevant )
 736 | #endif
 737 | 
 738 |         GroupMemoryBarrierWithGroupSync( );
 739 | 
 740 |         [branch]
 741 |         if( inOutputKernel )
 742 |         {
 743 |             lpfloat2 topRow         = g_groupShared2x2FracEdgesH[ centerAddr2x2 - rowStride2x2 ].zw;   // top row's bottom edge
 744 |             lpfloat2 leftColumn     = g_groupShared2x2FracEdgesV[ centerAddr2x2 - 1 ].yw;              // left column's right edge
 745 | 
 746 |             bool someNonZeroEdges = any( lpfloat4( qe0, qe1 ) + lpfloat4( qe2, qe3 ) + lpfloat4( topRow[0], topRow[1], leftColumn[0], leftColumn[1] ) );
 747 |             //bool someNonZeroEdges = packedCenterEdges.x | packedCenterEdges.y | (packedQuadP0M1.y & 0xFFFF0000) | (packedQuadM1P0.x & 0xFF00FF00);
 748 | 
 749 |             [branch]
 750 |             if( someNonZeroEdges )
 751 |             {
 752 |     #if CMAA_MSAA_SAMPLE_COUNT == 1
 753 |                 // Clear deferred color list heads to empty (if potentially needed - even though some edges might get culled by local contrast adaptation 
 754 |                 // step below, it's still cheaper to just clear it without additional logic)
 755 |             #if !defined(PLATFORM_NO_TEXTURE_ATOMICS)
 756 |                 g_workingDeferredBlendItemListHeads[ uint2( pixelPos ) / 2 ] = 0xFFFFFFFF;
 757 |             #else
 758 |                 uint quadPosFlat = pixelPos.x / 2 + pixelPos.y / 2 * g_workingDeferredBlendItemListHeads_Width;
 759 |                 g_workingDeferredBlendItemListHeads[ quadPosFlat ] = 0xFFFFFFFF;
 760 |             #endif 
 761 |     #endif
 762 | 
 763 |                 lpfloat4 ce[4];
 764 | 
 765 |             #if 1 // local contrast adaptation
 766 |                 lpfloat2 dummyd0, dummyd1, dummyd2;
 767 |                 lpfloat2 neighbourhood[4][4];
 768 | 
 769 |                 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 770 |                 // load & unpack kernel data from SLM
 771 |                 GroupsharedLoadQuadHV( centerAddr2x2 - rowStride2x2 - 1 , dummyd0, dummyd1, dummyd2, neighbourhood[0][0] );
 772 |                 GroupsharedLoadQuadHV( centerAddr2x2 - rowStride2x2     , dummyd0, dummyd1, neighbourhood[1][0], neighbourhood[2][0] );
 773 |                 GroupsharedLoadQuadHV( centerAddr2x2 - rowStride2x2 + 1 , dummyd0, dummyd1, neighbourhood[3][0], dummyd2 );
 774 |                 GroupsharedLoadQuadHV( centerAddr2x2 - 1                , dummyd0, neighbourhood[0][1], dummyd1, neighbourhood[0][2] );
 775 |                 GroupsharedLoadQuadHV( centerAddr2x2 + 1                , neighbourhood[3][1], dummyd0, neighbourhood[3][2], dummyd1 );
 776 |                 GroupsharedLoadQuadHV( centerAddr2x2 - 1 + rowStride2x2 , dummyd0, neighbourhood[0][3], dummyd1, dummyd2 );
 777 |                 GroupsharedLoadQuadHV( centerAddr2x2 + rowStride2x2     , neighbourhood[1][3], neighbourhood[2][3], dummyd0, dummyd1 );
 778 |                 neighbourhood[1][0].y = topRow[0]; // already in registers
 779 |                 neighbourhood[2][0].y = topRow[1]; // already in registers
 780 |                 neighbourhood[0][1].x = leftColumn[0]; // already in registers
 781 |                 neighbourhood[0][2].x = leftColumn[1]; // already in registers
 782 |                 neighbourhood[1][1] = qe0; // already in registers
 783 |                 neighbourhood[2][1] = qe1; // already in registers
 784 |                 neighbourhood[1][2] = qe2; // already in registers
 785 |                 neighbourhood[2][2] = qe3; // already in registers
 786 |                 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 787 |         
 788 |                 topRow[0]     = ( topRow[0]     - ComputeLocalContrastH( 0, -1, neighbourhood ) ) > GetActualEdgeThreshold();
 789 |                 topRow[1]     = ( topRow[1]     - ComputeLocalContrastH( 1, -1, neighbourhood ) ) > GetActualEdgeThreshold();
 790 |                 leftColumn[0] = ( leftColumn[0] - ComputeLocalContrastV( -1, 0, neighbourhood ) ) > GetActualEdgeThreshold();
 791 |                 leftColumn[1] = ( leftColumn[1] - ComputeLocalContrastV( -1, 1, neighbourhood ) ) > GetActualEdgeThreshold();
 792 | 
 793 |                 ce[0].x = ( qe0.x - ComputeLocalContrastV( 0, 0, neighbourhood ) ) > GetActualEdgeThreshold();
 794 |                 ce[0].y = ( qe0.y - ComputeLocalContrastH( 0, 0, neighbourhood ) ) > GetActualEdgeThreshold();
 795 |                 ce[1].x = ( qe1.x - ComputeLocalContrastV( 1, 0, neighbourhood ) ) > GetActualEdgeThreshold();
 796 |                 ce[1].y = ( qe1.y - ComputeLocalContrastH( 1, 0, neighbourhood ) ) > GetActualEdgeThreshold();
 797 |                 ce[2].x = ( qe2.x - ComputeLocalContrastV( 0, 1, neighbourhood ) ) > GetActualEdgeThreshold();
 798 |                 ce[2].y = ( qe2.y - ComputeLocalContrastH( 0, 1, neighbourhood ) ) > GetActualEdgeThreshold();
 799 |                 ce[3].x = ( qe3.x - ComputeLocalContrastV( 1, 1, neighbourhood ) ) > GetActualEdgeThreshold();
 800 |                 ce[3].y = ( qe3.y - ComputeLocalContrastH( 1, 1, neighbourhood ) ) > GetActualEdgeThreshold();
 801 |             #else
 802 |                 topRow[0]     = topRow[0]    > GetActualEdgeThreshold();
 803 |                 topRow[1]     = topRow[1]    > GetActualEdgeThreshold();
 804 |                 leftColumn[0] = leftColumn[0]> GetActualEdgeThreshold();
 805 |                 leftColumn[1] = leftColumn[1]> GetActualEdgeThreshold();
 806 |                 ce[0].x = qe0.x > GetActualEdgeThreshold();
 807 |                 ce[0].y = qe0.y > GetActualEdgeThreshold();
 808 |                 ce[1].x = qe1.x > GetActualEdgeThreshold();
 809 |                 ce[1].y = qe1.y > GetActualEdgeThreshold();
 810 |                 ce[2].x = qe2.x > GetActualEdgeThreshold();
 811 |                 ce[2].y = qe2.y > GetActualEdgeThreshold();
 812 |                 ce[3].x = qe3.x > GetActualEdgeThreshold();
 813 |                 ce[3].y = qe3.y > GetActualEdgeThreshold();
 814 |             #endif
 815 | 
 816 |                 //left
 817 |                 ce[0].z = leftColumn[0];
 818 |                 ce[1].z = ce[0].x;
 819 |                 ce[2].z = leftColumn[1];
 820 |                 ce[3].z = ce[2].x;
 821 | 
 822 |                 // top
 823 |                 ce[0].w = topRow[0];
 824 |                 ce[1].w = topRow[1];
 825 |                 ce[2].w = ce[0].y;
 826 |                 ce[3].w = ce[1].y;
 827 | 
 828 |                 [unroll]
 829 |                 for( i = 0; i < 4; i++ )
 830 |                 {
 831 |                     const uint2 localPixelPos = pixelPos + qeOffsets[i];
 832 | 
 833 |                     const lpfloat4 edges = ce[i];
 834 | 
 835 |                     // if there's at least one two edge corner, this is a candidate for simple or complex shape processing...
 836 |                     bool isCandidate = ( edges.x * edges.y + edges.y * edges.z + edges.z * edges.w + edges.w * edges.x ) != 0;
 837 |                     if( isCandidate )
 838 |                     {
 839 |                         uint counterIndex;  g_workingControlBuffer.InterlockedAdd( 4*4, 1, counterIndex );
 840 |                         g_workingShapeCandidates[counterIndex] = (localPixelPos.x << 18) | (msaaSampleIndex << 14) | localPixelPos.y;
 841 |                     }
 842 | 
 843 |                     // Write out edges - we write out all, including empty pixels, to make sure shape detection edge tracing
 844 |                     // doesn't continue on previous frame's edges that no longer exist.
 845 |                     uint packedEdge = PackEdges( edges );
 846 |     #if CMAA_MSAA_SAMPLE_COUNT > 1
 847 |                     outEdges[i] |= packedEdge << (msaaSampleIndex * 4);
 848 |     #else
 849 |                     outEdges[i] = packedEdge;
 850 |     #endif
 851 |                 }
 852 |             }
 853 |         }
 854 |     }
 855 | 
 856 |     // finally, write the edges!
 857 |     [branch]
 858 |     if( inOutputKernel )
 859 |     {
 860 | #if CMAA_PACK_SINGLE_SAMPLE_EDGE_TO_HALF_WIDTH && CMAA_MSAA_SAMPLE_COUNT == 1
 861 | #if CMAA2_EDGE_UNORM
 862 |         g_workingEdges[ int2(pixelPos.x/2, pixelPos.y+0) ] = ((outEdges[1] << 4) | outEdges[0]) / 255.0;
 863 |         g_workingEdges[ int2(pixelPos.x/2, pixelPos.y+1) ] = ((outEdges[3] << 4) | outEdges[2]) / 255.0;        
 864 | #else
 865 |         g_workingEdges[ int2(pixelPos.x/2, pixelPos.y+0) ] = (outEdges[1] << 4) | outEdges[0];
 866 |         g_workingEdges[ int2(pixelPos.x/2, pixelPos.y+1) ] = (outEdges[3] << 4) | outEdges[2];
 867 | #endif
 868 | #else
 869 |         {
 870 |             [unroll] for( uint i = 0; i < 4; i++ )
 871 |             g_workingEdges[pixelPos + qeOffsets[i]] = outEdges[i];
 872 |         }
 873 | #endif
 874 |     }
 875 | }
 876 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 877 | 
 878 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 879 | // Compute shaders used to generate DispatchIndirec() control buffer
 880 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 881 | //
 882 | // Compute dispatch arguments for the DispatchIndirect() that calls ProcessCandidatesCS and DeferredColorApply2x2CS
 883 | [numthreads( 1, 1, 1 )]
 884 | void ComputeDispatchArgsCS( uint3 groupID : SV_GroupID )
 885 | {
 886 |     // activated once on Dispatch( 2, 1, 1 )
 887 |     if( groupID.x == 1 )
 888 |     {
 889 |         // get current count
 890 |         uint shapeCandidateCount = g_workingControlBuffer.Load(4*4);
 891 | 
 892 |         // check for overflow!
 893 |         uint appendBufferMaxCount; uint appendBufferStride;
 894 |         GET_BUFFER_DIMENSIONS(g_workingShapeCandidates, appendBufferMaxCount, appendBufferStride);
 895 |         shapeCandidateCount = min( shapeCandidateCount, appendBufferMaxCount );
 896 | 
 897 |         // write dispatch indirect arguments for ProcessCandidatesCS
 898 |         g_workingExecuteIndirectBuffer.Store( 4*0, ( shapeCandidateCount + CMAA2_PROCESS_CANDIDATES_NUM_THREADS - 1 ) / CMAA2_PROCESS_CANDIDATES_NUM_THREADS );
 899 |         g_workingExecuteIndirectBuffer.Store( 4*1, 1 );                                                                                                       
 900 |         g_workingExecuteIndirectBuffer.Store( 4*2, 1 );                                                                                                       
 901 | 
 902 |         // write actual number of items to process in ProcessCandidatesCS
 903 |         g_workingControlBuffer.Store( 4*3, shapeCandidateCount );                                                                                     
 904 |     } 
 905 |     // activated once on Dispatch( 1, 2, 1 )
 906 |     else if( groupID.y == 1 )
 907 |     {
 908 |         // get current count
 909 |         uint blendLocationCount = g_workingControlBuffer.Load(4*8);
 910 | 
 911 |         // check for overflow!
 912 |         { 
 913 |             uint appendBufferMaxCount; uint appendBufferStride;
 914 |             GET_BUFFER_DIMENSIONS(g_workingDeferredBlendLocationList, appendBufferMaxCount, appendBufferStride);
 915 |             blendLocationCount = min( blendLocationCount, appendBufferMaxCount );
 916 |         }
 917 | 
 918 |         // write dispatch indirect arguments for DeferredColorApply2x2CS
 919 | #if CMAA2_DEFERRED_APPLY_THREADGROUP_SWAP
 920 |         g_workingExecuteIndirectBuffer.Store( 4*0, 1 );
 921 |         g_workingExecuteIndirectBuffer.Store( 4*1, ( blendLocationCount + CMAA2_DEFERRED_APPLY_NUM_THREADS - 1 ) / CMAA2_DEFERRED_APPLY_NUM_THREADS );
 922 | #else
 923 |         g_workingExecuteIndirectBuffer.Store( 4*0, ( blendLocationCount + CMAA2_DEFERRED_APPLY_NUM_THREADS - 1 ) / CMAA2_DEFERRED_APPLY_NUM_THREADS );
 924 |         g_workingExecuteIndirectBuffer.Store( 4*1, 1 );
 925 | #endif
 926 |         g_workingExecuteIndirectBuffer.Store( 4*2, 1 );
 927 | 
 928 |         // write actual number of items to process in DeferredColorApply2x2CS
 929 |         g_workingControlBuffer.Store( 4*3, blendLocationCount);
 930 | 
 931 |         // clear counters for next frame
 932 |         g_workingControlBuffer.Store( 4*4 , 0 );
 933 |         g_workingControlBuffer.Store( 4*8 , 0 );
 934 |         g_workingControlBuffer.Store( 4*12, 0 );
 935 |     }
 936 | }
 937 | //
 938 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 939 | 
 940 | 
 941 | void FindZLineLengths( out lpfloat lineLengthLeft, out lpfloat lineLengthRight, uint2 screenPos, uniform bool horizontal, uniform bool invertedZShape, const float2 stepRight, uint msaaSampleIndex )
 942 | {
 943 | // this enables additional conservativeness test but is pretty detrimental to the final effect so left disabled by default even when CMAA2_EXTRA_SHARPNESS is enabled
 944 | #define CMAA2_EXTRA_CONSERVATIVENESS2 0
 945 |     /////////////////////////////////////////////////////////////////////////////////////////////////////////
 946 |     // TODO: a cleaner and faster way to get to these - a precalculated array indexing maybe?
 947 |     uint maskLeft, bitsContinueLeft, maskRight, bitsContinueRight;
 948 |     {
 949 |         // Horizontal (vertical is the same, just rotated 90- counter-clockwise)
 950 |         // Inverted Z case:              // Normal Z case:
 951 |         //   __                          // __
 952 |         //  X|                           //  X|
 953 |         // --                            //   --
 954 |         uint maskTraceLeft, maskTraceRight;
 955 | #if CMAA2_EXTRA_CONSERVATIVENESS2
 956 |         uint maskStopLeft, maskStopRight;
 957 | #endif
 958 |         if( horizontal )
 959 |         {
 960 |             maskTraceLeft = 0x08; // tracing top edge
 961 |             maskTraceRight = 0x02; // tracing bottom edge
 962 | #if CMAA2_EXTRA_CONSERVATIVENESS2
 963 |             maskStopLeft = 0x01; // stop on right edge
 964 |             maskStopRight = 0x04; // stop on left edge
 965 | #endif
 966 |         }
 967 |         else
 968 |         {
 969 |             maskTraceLeft = 0x04; // tracing left edge
 970 |             maskTraceRight = 0x01; // tracing right edge
 971 | #if CMAA2_EXTRA_CONSERVATIVENESS2
 972 |             maskStopLeft = 0x08; // stop on top edge
 973 |             maskStopRight = 0x02; // stop on bottom edge
 974 | #endif
 975 |         }
 976 |         if( invertedZShape )
 977 |         {
 978 |             uint temp = maskTraceLeft;
 979 |             maskTraceLeft = maskTraceRight;
 980 |             maskTraceRight = temp;
 981 |         }
 982 |         maskLeft = maskTraceLeft;
 983 |         bitsContinueLeft = maskTraceLeft;
 984 |         maskRight = maskTraceRight;
 985 | #if CMAA2_EXTRA_CONSERVATIVENESS2
 986 |         maskLeft |= maskStopLeft;
 987 |         maskRight |= maskStopRight;
 988 | #endif
 989 |         bitsContinueRight = maskTraceRight;
 990 |     }
 991 |     /////////////////////////////////////////////////////////////////////////////////////////////////////////
 992 | 
 993 |     bool continueLeft = true;
 994 |     bool continueRight = true;
 995 |     lineLengthLeft = 1;
 996 |     lineLengthRight = 1;
 997 |     [loop]
 998 |     for( ; ; )
 999 |     {
1000 |         uint edgeLeft =     LoadEdge( screenPos.xy - stepRight * float(lineLengthLeft)          , int2( 0, 0 ), msaaSampleIndex );
1001 |         uint edgeRight =    LoadEdge( screenPos.xy + stepRight * ( float(lineLengthRight) + 1 ) , int2( 0, 0 ), msaaSampleIndex );
1002 | 
1003 |         // stop on encountering 'stopping' edge (as defined by masks)
1004 |         continueLeft    = continueLeft  && ( ( edgeLeft & maskLeft ) == bitsContinueLeft );
1005 |         continueRight   = continueRight && ( ( edgeRight & maskRight ) == bitsContinueRight );
1006 | 
1007 |         lineLengthLeft += continueLeft;
1008 |         lineLengthRight += continueRight;
1009 | 
1010 |         lpfloat maxLR = max( lineLengthRight, lineLengthLeft );
1011 | 
1012 |         // both stopped? cause the search end by setting maxLR to max length.
1013 |         if( !continueLeft && !continueRight )
1014 |             maxLR = (lpfloat)c_maxLineLength;
1015 | 
1016 |         // either the longer one is ahead of the smaller (already stopped) one by more than a factor of x, or both
1017 |         // are stopped - end the search.
1018 | #if CMAA2_EXTRA_SHARPNESS
1019 |         if( maxLR >= min( (lpfloat)c_maxLineLength, (1.20 * min( lineLengthRight, lineLengthLeft ) - 0.20) ) )
1020 | #else
1021 |         if( maxLR >= min( (lpfloat)c_maxLineLength, (1.25 * min( lineLengthRight, lineLengthLeft ) - 0.25) ) )
1022 | #endif
1023 |             break;
1024 |     }
1025 | }
1026 | 
1027 | // these are blendZ settings, determined empirically :)
1028 | static const lpfloat c_symmetryCorrectionOffset = lpfloat( 0.22 );
1029 | #if CMAA2_EXTRA_SHARPNESS
1030 | static const lpfloat c_dampeningEffect          = lpfloat( 0.11 );
1031 | #else
1032 | static const lpfloat c_dampeningEffect          = lpfloat( 0.15 );
1033 | #endif
1034 | 
1035 | #if CMAA2_COLLECT_EXPAND_BLEND_ITEMS
1036 | bool CollectBlendZs( uint2 screenPos, bool horizontal, bool invertedZShape, lpfloat shapeQualityScore, lpfloat lineLengthLeft, lpfloat lineLengthRight, float2 stepRight, uint msaaSampleIndex )
1037 | {
1038 |     lpfloat leftOdd = c_symmetryCorrectionOffset * lpfloat( lineLengthLeft % 2 );
1039 |     lpfloat rightOdd = c_symmetryCorrectionOffset * lpfloat( lineLengthRight % 2 );
1040 | 
1041 |     lpfloat dampenEffect = saturate( lpfloat(lineLengthLeft + lineLengthRight - shapeQualityScore) * c_dampeningEffect ) ;
1042 | 
1043 |     lpfloat loopFrom = -floor( ( lineLengthLeft + 1 ) / 2 ) + 1.0;
1044 |     lpfloat loopTo = floor( ( lineLengthRight + 1 ) / 2 );
1045 |     
1046 |     uint itemIndex;
1047 |     const uint blendItemCount = loopTo-loopFrom+1;
1048 |     InterlockedAdd( g_groupSharedBlendItemCount, blendItemCount, itemIndex );
1049 |     // safety
1050 |     if( (itemIndex+blendItemCount) > CMAA2_BLEND_ITEM_SLM_SIZE )
1051 |         return false;
1052 | 
1053 |     lpfloat totalLength = lpfloat(loopTo - loopFrom) + 1 - leftOdd - rightOdd;
1054 |     lpfloat lerpStep = lpfloat(1.0) / totalLength;
1055 | 
1056 |     lpfloat lerpFromK = (0.5 - leftOdd - loopFrom) * lerpStep;
1057 | 
1058 |     uint itemHeader     = (screenPos.x << 18) | (msaaSampleIndex << 14) | screenPos.y;
1059 |     uint itemValStatic  = (horizontal << 31) | (invertedZShape << 30);
1060 | 
1061 |     for( lpfloat i = loopFrom; i <= loopTo; i++ )
1062 |     {
1063 |         lpfloat lerpVal = lerpStep * i + lerpFromK;
1064 | 
1065 |         lpfloat secondPart = (i>0);
1066 |         lpfloat srcOffset = 1.0 - secondPart * 2.0;
1067 | 
1068 |         lpfloat lerpK = (lerpStep * i + lerpFromK) * srcOffset + secondPart;
1069 |         lerpK *= dampenEffect;
1070 | 
1071 |         int2 encodedItem;
1072 |         encodedItem.x = itemHeader;
1073 |         encodedItem.y = itemValStatic | ((uint(i+256) /*& 0x3FF*/) << 20) | ( (uint(srcOffset+256) /*& 0x3FF*/ ) << 10 ) | uint( saturate(lerpK) * 1023 + 0.5 );
1074 |         g_groupSharedBlendItems[itemIndex++] = encodedItem;
1075 |     }
1076 |     return true;
1077 | }
1078 | #endif
1079 | 
1080 | void BlendZs( uint2 screenPos, bool horizontal, bool invertedZShape, lpfloat shapeQualityScore, lpfloat lineLengthLeft, lpfloat lineLengthRight, float2 stepRight, uint msaaSampleIndex )
1081 | {
1082 |     float2 blendDir = ( horizontal ) ? ( float2( 0, -1 ) ) : ( float2( -1, 0 ) );
1083 | 
1084 |     if( invertedZShape )
1085 |         blendDir = -blendDir;
1086 | 
1087 |     lpfloat leftOdd = c_symmetryCorrectionOffset * lpfloat( lineLengthLeft % 2 );
1088 |     lpfloat rightOdd = c_symmetryCorrectionOffset * lpfloat( lineLengthRight % 2 );
1089 | 
1090 |     lpfloat dampenEffect = saturate( lpfloat(lineLengthLeft + lineLengthRight - shapeQualityScore) * c_dampeningEffect ) ;
1091 | 
1092 |     lpfloat loopFrom = -floor( ( lineLengthLeft + 1 ) / 2 ) + 1.0;
1093 |     lpfloat loopTo = floor( ( lineLengthRight + 1 ) / 2 );
1094 |     
1095 |     lpfloat totalLength = lpfloat(loopTo - loopFrom) + 1 - leftOdd - rightOdd;
1096 |     lpfloat lerpStep = lpfloat(1.0) / totalLength;
1097 | 
1098 |     lpfloat lerpFromK = (0.5 - leftOdd - loopFrom) * lerpStep;
1099 | 
1100 |     for( lpfloat i = loopFrom; i <= loopTo; i++ )
1101 |     {
1102 |         lpfloat lerpVal = lerpStep * i + lerpFromK;
1103 | 
1104 |         lpfloat secondPart = (i>0);
1105 |         lpfloat srcOffset = 1.0 - secondPart * 2.0;
1106 | 
1107 |         lpfloat lerpK = (lerpStep * i + lerpFromK) * srcOffset + secondPart;
1108 |         lerpK *= dampenEffect;
1109 | 
1110 |         float2 pixelPos = screenPos + stepRight * float(i);
1111 | 
1112 |         lpfloat3 colorCenter    = LoadSourceColor( pixelPos, int2( 0, 0 ), msaaSampleIndex ).rgb;
1113 |         lpfloat3 colorFrom      = LoadSourceColor( pixelPos.xy + blendDir * float(srcOffset).xx, int2( 0, 0 ), msaaSampleIndex ).rgb;
1114 |         
1115 |         lpfloat3 output = lerp( colorCenter.rgb, colorFrom.rgb, lerpK );
1116 | 
1117 |         StoreColorSample( pixelPos.xy, output, true, msaaSampleIndex );
1118 |     }
1119 | }
1120 | 
1121 | // TODO:
1122 | // There were issues with moving this (including the calling code) to half-float on some hardware (broke in certain cases on RX 480).
1123 | // Further investigation is required.
1124 | void DetectZsHorizontal( in lpfloat4 edges, in lpfloat4 edgesM1P0, in lpfloat4 edgesP1P0, in lpfloat4 edgesP2P0, out lpfloat invertedZScore, out lpfloat normalZScore )
1125 | {
1126 |     // Inverted Z case:
1127 |     //   __
1128 |     //  X|
1129 |     // --
1130 |     {
1131 |         invertedZScore  = edges.r * edges.g *                edgesP1P0.a;
1132 |         invertedZScore  *= 2.0 + ((edgesM1P0.g + edgesP2P0.a) ) - (edges.a + edgesP1P0.g) - 0.7 * (edgesP2P0.g + edgesM1P0.a + edges.b + edgesP1P0.r);
1133 |     }
1134 | 
1135 |     // Normal Z case:
1136 |     // __
1137 |     //  X|
1138 |     //   --
1139 |     {
1140 |         normalZScore    = edges.r * edges.a *                edgesP1P0.g;
1141 |         normalZScore    *= 2.0 + ((edgesM1P0.a + edgesP2P0.g) ) - (edges.g + edgesP1P0.a) - 0.7 * (edgesP2P0.a + edgesM1P0.g + edges.b + edgesP1P0.r);
1142 |     }
1143 | }
1144 | 
1145 | [numthreads( CMAA2_PROCESS_CANDIDATES_NUM_THREADS, 1, 1 )]
1146 | void ProcessCandidatesCS( uint3 dispatchThreadID : SV_DispatchThreadID, uint3 groupThreadID : SV_GroupThreadID )
1147 | {
1148 | #if CMAA2_COLLECT_EXPAND_BLEND_ITEMS
1149 |     if( groupThreadID.x == 0 )
1150 |         g_groupSharedBlendItemCount = 0;
1151 |     GroupMemoryBarrierWithGroupSync( );
1152 | #endif
1153 | 
1154 |     uint msaaSampleIndex = 0;
1155 |     const uint numCandidates = g_workingControlBuffer.Load(4*3); //g_workingControlBuffer[3];
1156 |     if( dispatchThreadID.x < numCandidates )
1157 |     {
1158 | 
1159 | 	uint pixelID = g_workingShapeCandidates[dispatchThreadID.x];
1160 | 
1161 | #if 0 // debug display
1162 |     uint2 screenSize;
1163 |     g_inoutColorReadonly.GetDimensions( screenSize.x, screenSize.y );
1164 |     StoreColorSample( uint2(dispatchThreadID.x % screenSize.x, dispatchThreadID.x / screenSize.x), lpfloat3( 1, 1, 0 ), false, msaaSampleIndex );
1165 |     return;
1166 | #endif
1167 | 
1168 |     uint2 pixelPos = uint2( (pixelID >> 18) /*& 0x3FFF*/, pixelID & 0x3FFF );
1169 | #if CMAA_MSAA_SAMPLE_COUNT > 1
1170 |     msaaSampleIndex = (pixelID >> 14) & 0x07;
1171 | #endif
1172 | 
1173 | #if CMAA_MSAA_SAMPLE_COUNT > 1
1174 |     int4 loadPosCenter = int4( pixelPos, msaaSampleIndex, 0 );
1175 | #else
1176 |     int3 loadPosCenter = int3( pixelPos, 0 );
1177 | #endif
1178 | 
1179 |     uint edgesCenterPacked = LoadEdge( pixelPos, int2( 0, 0 ), msaaSampleIndex );
1180 |     lpfloat4 edges      = UnpackEdgesFlt( edgesCenterPacked );
1181 |     lpfloat4 edgesLeft  = UnpackEdgesFlt( LoadEdge( pixelPos, int2( -1, 0 ), msaaSampleIndex ) );
1182 |     lpfloat4 edgesRight = UnpackEdgesFlt( LoadEdge( pixelPos, int2(  1, 0 ), msaaSampleIndex ) );
1183 |     lpfloat4 edgesBottom= UnpackEdgesFlt( LoadEdge( pixelPos, int2( 0,  1 ), msaaSampleIndex ) );
1184 |     lpfloat4 edgesTop   = UnpackEdgesFlt( LoadEdge( pixelPos, int2( 0, -1 ), msaaSampleIndex ) );
1185 |     
1186 |     // simple shapes
1187 |     {
1188 |         lpfloat4 blendVal = ComputeSimpleShapeBlendValues( edges, edgesLeft, edgesRight, edgesTop, edgesBottom, true );
1189 | 
1190 |         const lpfloat fourWeightSum = dot( blendVal, lpfloat4( 1, 1, 1, 1 ) );
1191 |         const lpfloat centerWeight = 1.0 - fourWeightSum;
1192 | 
1193 |         lpfloat3 outColor = LoadSourceColor( pixelPos, int2( 0, 0 ), msaaSampleIndex ).rgb * centerWeight;
1194 |         [flatten]
1195 |         if( blendVal.x > 0.0 )   // from left
1196 |         {
1197 |             lpfloat3 pixelL = LoadSourceColor( pixelPos, int2( -1, 0 ), msaaSampleIndex ).rgb;
1198 |             outColor.rgb += blendVal.x * pixelL;
1199 |         }
1200 |         [flatten]
1201 |         if( blendVal.y > 0.0 )   // from above
1202 |         {
1203 |             lpfloat3 pixelT = LoadSourceColor( pixelPos, int2( 0, -1 ), msaaSampleIndex ).rgb; 
1204 |             outColor.rgb += blendVal.y * pixelT;
1205 |         }
1206 |         [flatten]
1207 |         if( blendVal.z > 0.0 )   // from right
1208 |         {
1209 |             lpfloat3 pixelR = LoadSourceColor( pixelPos, int2( 1, 0 ), msaaSampleIndex ).rgb;
1210 |             outColor.rgb += blendVal.z * pixelR;
1211 |         }
1212 |         [flatten]
1213 |         if( blendVal.w > 0.0 )   // from below
1214 |         {
1215 |             lpfloat3 pixelB = LoadSourceColor( pixelPos, int2( 0, 1 ), msaaSampleIndex ).rgb;
1216 |             outColor.rgb += blendVal.w * pixelB;
1217 |         }
1218 | 
1219 |         StoreColorSample( pixelPos.xy, outColor, false, msaaSampleIndex );
1220 |     }
1221 | 
1222 |     // complex shapes - detect
1223 |     {
1224 |         lpfloat invertedZScore;
1225 |         lpfloat normalZScore;
1226 |         lpfloat maxScore;
1227 |         bool horizontal = true;
1228 |         bool invertedZ = false;
1229 |         // lpfloat shapeQualityScore;    // 0 - best quality, 1 - some edges missing but ok, 2 & 3 - dubious but better than nothing
1230 | 
1231 |         /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1232 |         // horizontal
1233 |         {
1234 |             lpfloat4 edgesM1P0 = edgesLeft;
1235 |             lpfloat4 edgesP1P0 = edgesRight;
1236 |             lpfloat4 edgesP2P0 = UnpackEdgesFlt( LoadEdge( pixelPos, int2(  2, 0 ), msaaSampleIndex ) );
1237 | 
1238 |             DetectZsHorizontal( edges, edgesM1P0, edgesP1P0, edgesP2P0, invertedZScore, normalZScore );
1239 |             maxScore = max( invertedZScore, normalZScore );
1240 | 
1241 |             if( maxScore > 0 )
1242 |             {
1243 |                 invertedZ = invertedZScore > normalZScore;
1244 |             }
1245 |         }
1246 |         /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1247 | 
1248 |         /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1249 |         // vertical
1250 |         {
1251 |             // Reuse the same code for vertical (used for horizontal above), but rotate input data 90 degrees counter-clockwise, so that:
1252 |             // left     becomes     bottom
1253 |             // top      becomes     left
1254 |             // right    becomes     top
1255 |             // bottom   becomes     right
1256 | 
1257 |             // we also have to rotate edges, thus .argb
1258 |             lpfloat4 edgesM1P0 = edgesBottom;
1259 |             lpfloat4 edgesP1P0 = edgesTop;
1260 |             lpfloat4 edgesP2P0 = UnpackEdgesFlt( LoadEdge( pixelPos, int2( 0, -2 ), msaaSampleIndex ) );
1261 | 
1262 |             DetectZsHorizontal( edges.argb, edgesM1P0.argb, edgesP1P0.argb, edgesP2P0.argb, invertedZScore, normalZScore );
1263 |             lpfloat vertScore = max( invertedZScore, normalZScore );
1264 | 
1265 |             if( vertScore > maxScore )
1266 |             {
1267 |                 maxScore = vertScore;
1268 |                 horizontal = false;
1269 |                 invertedZ = invertedZScore > normalZScore;
1270 |                 //shapeQualityScore = floor( clamp(4.0 - maxScore, 0.0, 3.0) );
1271 |             }
1272 |         }
1273 |         /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1274 | 
1275 |         if( maxScore > 0 )
1276 |         {
1277 | #if CMAA2_EXTRA_SHARPNESS
1278 |             lpfloat shapeQualityScore = round( clamp(4.0 - maxScore, 0.0, 3.0) );    // 0 - best quality, 1 - some edges missing but ok, 2 & 3 - dubious but better than nothing
1279 | #else
1280 |             lpfloat shapeQualityScore = floor( clamp(4.0 - maxScore, 0.0, 3.0) );    // 0 - best quality, 1 - some edges missing but ok, 2 & 3 - dubious but better than nothing
1281 | #endif
1282 | 
1283 |             const float2 stepRight = ( horizontal ) ? ( float2( 1, 0 ) ) : ( float2( 0, -1 ) );
1284 |             lpfloat lineLengthLeft, lineLengthRight;
1285 |             FindZLineLengths( lineLengthLeft, lineLengthRight, pixelPos, horizontal, invertedZ, stepRight, msaaSampleIndex );
1286 | 
1287 |             lineLengthLeft  -= shapeQualityScore;
1288 |             lineLengthRight -= shapeQualityScore;
1289 | 
1290 |             if( ( lineLengthLeft + lineLengthRight ) >= (5.0) )
1291 |             {
1292 | #if CMAA2_COLLECT_EXPAND_BLEND_ITEMS
1293 |                 // try adding to SLM but fall back to in-place processing if full (which only really happens in synthetic test cases)
1294 |                 if( !CollectBlendZs( pixelPos, horizontal, invertedZ, shapeQualityScore, lineLengthLeft, lineLengthRight, stepRight, msaaSampleIndex ) )
1295 | #endif
1296 |                     BlendZs( pixelPos, horizontal, invertedZ, shapeQualityScore, lineLengthLeft, lineLengthRight, stepRight, msaaSampleIndex );
1297 |             }
1298 |         }
1299 |     }
1300 | 
1301 |     }
1302 | 
1303 | #if CMAA2_COLLECT_EXPAND_BLEND_ITEMS
1304 |     GroupMemoryBarrierWithGroupSync( );
1305 |     
1306 |     uint totalItemCount = min( CMAA2_BLEND_ITEM_SLM_SIZE, g_groupSharedBlendItemCount );
1307 | 
1308 |     // spread items into waves
1309 |     uint loops = (totalItemCount+(CMAA2_PROCESS_CANDIDATES_NUM_THREADS-1)-groupThreadID.x)/CMAA2_PROCESS_CANDIDATES_NUM_THREADS;
1310 | 
1311 |     for( uint loop = 0; loop < loops; loop++ )
1312 |     {
1313 |         uint    index           = loop*CMAA2_PROCESS_CANDIDATES_NUM_THREADS + groupThreadID.x;
1314 | 
1315 |         uint2   itemVal         = g_groupSharedBlendItems[index];
1316 | 
1317 |         uint2   startingPos     = uint2( (itemVal.x >> 18) /*& 0x3FFF*/, itemVal.x & 0x3FFF );
1318 |         uint itemMSAASampleIndex= 0;
1319 | #if CMAA_MSAA_SAMPLE_COUNT > 1
1320 |         itemMSAASampleIndex     = (itemVal.x >> 14) & 0x07;
1321 | #endif
1322 | 
1323 |         bool    itemHorizontal  = (itemVal.y >> 31) & 1;
1324 |         bool    itemInvertedZ   = (itemVal.y >> 30) & 1;
1325 |         lpfloat itemStepIndex   = float((itemVal.y >> 20) & 0x3FF) - 256.0;
1326 |         lpfloat itemSrcOffset   = ((itemVal.y >> 10) & 0x3FF) - 256.0;
1327 |         lpfloat itemLerpK       = (itemVal.y & 0x3FF) / 1023.0;
1328 | 
1329 |         lpfloat2 itemStepRight    = ( itemHorizontal ) ? ( lpfloat2( 1, 0 ) ) : ( lpfloat2( 0, -1 ) );
1330 |         lpfloat2 itemBlendDir     = ( itemHorizontal ) ? ( lpfloat2( 0, -1 ) ) : ( lpfloat2( -1, 0 ) );
1331 |         if( itemInvertedZ )
1332 |             itemBlendDir = -itemBlendDir;
1333 | 
1334 |         uint2 itemPixelPos      = startingPos + itemStepRight * lpfloat(itemStepIndex);
1335 | 
1336 |         lpfloat3 colorCenter    = LoadSourceColor( itemPixelPos, int2( 0, 0 ), itemMSAASampleIndex ).rgb;
1337 |         lpfloat3 colorFrom      = LoadSourceColor( itemPixelPos.xy + itemBlendDir * lpfloat(itemSrcOffset).xx, int2( 0, 0 ), itemMSAASampleIndex ).rgb;
1338 |         
1339 |         lpfloat3 outputColor    = lerp( colorCenter.rgb, colorFrom.rgb, itemLerpK );
1340 | 
1341 |         StoreColorSample( itemPixelPos.xy, outputColor, true, itemMSAASampleIndex );
1342 |     }
1343 | #endif
1344 | 
1345 | }
1346 | 
1347 | #if CMAA2_DEFERRED_APPLY_THREADGROUP_SWAP
1348 | [numthreads( 4, CMAA2_DEFERRED_APPLY_NUM_THREADS, 1 )]
1349 | #else
1350 | [numthreads( CMAA2_DEFERRED_APPLY_NUM_THREADS, 4, 1 )]
1351 | #endif
1352 | void DeferredColorApply2x2CS( uint3 dispatchThreadID : SV_DispatchThreadID, uint3 groupThreadID : SV_GroupThreadID )
1353 | {
1354 |     const uint numCandidates    = g_workingControlBuffer.Load(4*3);
1355 | #if CMAA2_DEFERRED_APPLY_THREADGROUP_SWAP
1356 |     const uint currentCandidate     = dispatchThreadID.y;
1357 |     const uint currentQuadOffsetXY  = groupThreadID.x;
1358 | #else
1359 |     const uint currentCandidate     = dispatchThreadID.x;
1360 |     const uint currentQuadOffsetXY  = groupThreadID.y;
1361 | #endif
1362 | 
1363 |     if( currentCandidate >= numCandidates )
1364 |         return;
1365 | 
1366 |     uint pixelID    = g_workingDeferredBlendLocationList[currentCandidate];
1367 |     uint2 quadPos   = uint2( (pixelID >> 16), pixelID & 0xFFFF );
1368 |     const int2 qeOffsets[4] = { {0, 0}, {1, 0}, {0, 1}, {1, 1} };
1369 |     uint2 pixelPos  = quadPos*2+qeOffsets[currentQuadOffsetXY];
1370 | 
1371 | #if !defined(PLATFORM_NO_TEXTURE_ATOMICS)
1372 |     uint counterIndexWithHeader = g_workingDeferredBlendItemListHeads[quadPos];
1373 | #else
1374 |     uint quadPosFlat = quadPos.x + quadPos.y * g_workingDeferredBlendItemListHeads_Width;
1375 |     uint counterIndexWithHeader = g_workingDeferredBlendItemListHeads[quadPosFlat];
1376 | #endif
1377 | 
1378 |     int counter = 0;
1379 | 
1380 | #if CMAA_MSAA_SAMPLE_COUNT > 1
1381 |     lpfloat4 outColors[CMAA_MSAA_SAMPLE_COUNT];
1382 |     [unroll]
1383 |     for( uint msaaSampleIndex = 0; msaaSampleIndex < CMAA_MSAA_SAMPLE_COUNT; msaaSampleIndex++ )
1384 |         outColors[msaaSampleIndex] = lpfloat4( 0, 0, 0, 0 );
1385 |     bool hasValue = false;
1386 | #else
1387 |     lpfloat4 outColors = lpfloat4( 0, 0, 0, 0 );
1388 | #endif
1389 | 
1390 |     const uint maxLoops = 32*CMAA_MSAA_SAMPLE_COUNT;   // do the loop to prevent bad data hanging the GPU <- probably not needed
1391 |     {
1392 |         for( uint i = 0; (counterIndexWithHeader != 0xFFFFFFFF) && ( i < maxLoops); i ++ )
1393 |         {
1394 |             // decode item-specific info: {2 bits for 2x2 quad location}, {3 bits for MSAA sample index}, {1 bit for isComplexShape flag}, {26 bits for address}
1395 |             uint offsetXY           = (counterIndexWithHeader >> 30) & 0x03;
1396 |             uint msaaSampleIndex    = (counterIndexWithHeader >> 27) & 0x07;
1397 |             bool isComplexShape     = (counterIndexWithHeader >> 26) & 0x01;
1398 | 
1399 |             uint2 val = g_workingDeferredBlendItemList[ counterIndexWithHeader & ((1 << 26) - 1) ];
1400 | 
1401 |             counterIndexWithHeader  = val.x;
1402 | 
1403 |             if( offsetXY == currentQuadOffsetXY )
1404 |             {
1405 |                 lpfloat3 color      = InternalUnpackColor(val.y);
1406 |                 lpfloat weight      = 0.8 + 1.0 * lpfloat(isComplexShape);
1407 | #if CMAA_MSAA_SAMPLE_COUNT > 1
1408 |                 outColors[msaaSampleIndex] += lpfloat4( color * weight, weight );
1409 |                 hasValue = true;
1410 | #else
1411 |                 outColors += lpfloat4( color * weight, weight );
1412 | #endif
1413 |             }
1414 |             //numberOfElements[offsetXY]++;
1415 |         }
1416 |     }
1417 | 
1418 | #if CMAA_MSAA_SAMPLE_COUNT > 1
1419 |     if( !hasValue )             return;
1420 | #else
1421 |     if( outColors.a == 0 )      return;
1422 | #endif
1423 | 
1424 |     {
1425 | #if CMAA_MSAA_SAMPLE_COUNT > 1
1426 |         lpfloat4 outColor = 0;
1427 |         for( uint msaaSampleIndex = 0; msaaSampleIndex < CMAA_MSAA_SAMPLE_COUNT; msaaSampleIndex++ )
1428 |         {
1429 |             if( outColors[msaaSampleIndex].a != 0 )
1430 |                 outColor.xyz += outColors[msaaSampleIndex].rgb / (outColors[msaaSampleIndex].a);
1431 |             else
1432 |                 outColor.xyz += LoadSourceColor( pixelPos, int2(0, 0), msaaSampleIndex );
1433 |         }
1434 |         outColor /= (lpfloat)CMAA_MSAA_SAMPLE_COUNT;
1435 | #else
1436 |         lpfloat4 outColor = outColors;
1437 |         outColor.rgb /= outColor.a;
1438 | #endif
1439 |         FinalUAVStore( pixelPos, lpfloat3(outColor.rgb) );
1440 |     }
1441 | }
1442 | 
1443 | [numthreads( 16, 16, 1 )]
1444 | void DebugDrawEdgesCS( uint2 dispatchThreadID : SV_DispatchThreadID )
1445 | {
1446 |     int msaaSampleIndex = 0;
1447 |     lpfloat4 edges = UnpackEdgesFlt( LoadEdge( dispatchThreadID, int2( 0, 0 ), msaaSampleIndex ) );
1448 | 
1449 |     // show MSAA control mask
1450 |     // uint v = g_inColorMSComplexityMaskReadonly.Load( int3( dispatchThreadID, 0 ) );
1451 |     // FinalUAVStore( dispatchThreadID, float3( v, v, v ) );
1452 |     // return;
1453 | 
1454 | #if 0
1455 | #if CMAA_MSAA_SAMPLE_COUNT > 1
1456 |     uint2 pixelPos = dispatchThreadID.xy / 2 * 2;
1457 |     /*
1458 |     uint all2x2MSSamplesDifferent = 0;
1459 | 
1460 |      [unroll] for( uint x = 0; x < 4; x++ )
1461 |          [unroll] for( uint y = 0; y < 4; y++ )
1462 |              all2x2MSSamplesDifferent |= g_inColorMSComplexityMaskReadonly.Load( int3( pixelPos, 0 ), int2( x-1, y-1 ) ) > 0;
1463 |     bool firstLoopIsEnough = all2x2MSSamplesDifferent == 0;
1464 |     */
1465 |     
1466 | #if CMAA_MSAA_USE_COMPLEXITY_MASK
1467 |     float2 texSize;
1468 |     g_inColorMSComplexityMaskReadonly.GetDimensions( texSize.x, texSize.y );
1469 |     float2 gatherUV = float2(pixelPos) / texSize;
1470 |     float4 TL = g_inColorMSComplexityMaskReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 0, 0 ) );
1471 |     float4 TR = g_inColorMSComplexityMaskReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 2, 0 ) );
1472 |     float4 BL = g_inColorMSComplexityMaskReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 0, 2 ) );
1473 |     float4 BR = g_inColorMSComplexityMaskReadonly.GatherRed( g_gather_point_clamp_Sampler, gatherUV, int2( 2, 2 ) );
1474 |     float4 sumAll = TL+TR+BL+BR;
1475 |     bool firstLoopIsEnough = !any(sumAll);
1476 | 
1477 |     //all2x2MSSamplesDifferent = (all2x2MSSamplesDifferent != 0)?(CMAA_MSAA_SAMPLE_COUNT):(1);
1478 |     FinalUAVStore( dispatchThreadID, (firstLoopIsEnough).xxx );
1479 |     return;
1480 | #endif
1481 | #endif
1482 | #endif
1483 | 
1484 | 
1485 |     //if( any(edges) )
1486 |     {
1487 |         lpfloat4 outputColor = lpfloat4( lerp( edges.xyz, (0.5).xxx, edges.a * 0.2 ), 1.0 );
1488 |         FinalUAVStore( dispatchThreadID, outputColor.rgb );
1489 |     }
1490 | 
1491 | //#if CMAA2_EDGE_DETECTION_LUMA_PATH == 2
1492 | //    FinalUAVStore( dispatchThreadID, g_inLumaReadonly.Load( int3( dispatchThreadID.xy, 0 ) ).r );
1493 | //#endif
1494 | }
1495 | 
1496 | #endif // #ifndef __cplusplus
1497 | 
1498 | #endif // #ifndef __CMAA2_HLSL__
1499 | 


--------------------------------------------------------------------------------
/Core/CMAA2.hlsl.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 2d0fad4f2384140138d571b10ff5779e
3 | ShaderIncludeImporter:
4 |   externalObjects: {}
5 |   userData: 
6 |   assetBundleName: 
7 |   assetBundleVariant: 
8 | 


--------------------------------------------------------------------------------
/Core/CMAA2RenderFeature.cs:
--------------------------------------------------------------------------------
 1 | using UnityEngine;
 2 | using UnityEngine.Rendering;
 3 | using UnityEngine.Rendering.RenderGraphModule;
 4 | using UnityEngine.Rendering.Universal;
 5 | 
 6 | namespace CMAA2.Core
 7 | {
 8 |     public class CMAA2RenderFeature : ScriptableRendererFeature
 9 |     {
10 |         public ComputeShader CMAA2Compute;
11 | 
12 |         private CMAA2RenderPass _pass;
13 | 
14 |         public override void Create()
15 |         {
16 |             _pass = new CMAA2RenderPass(CMAA2Compute)
17 |             {
18 |                 renderPassEvent = RenderPassEvent.BeforeRenderingPostProcessing,
19 |             };
20 |         }
21 | 
22 |         public override void AddRenderPasses(ScriptableRenderer renderer, ref RenderingData renderingData)
23 |         {
24 |             renderer.EnqueuePass(_pass);
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/Core/CMAA2RenderFeature.cs.meta:
--------------------------------------------------------------------------------
 1 | fileFormatVersion: 2
 2 | guid: 3a15937ac5f4421997dec111705d6d02
 3 | MonoImporter:
 4 |   externalObjects: {}
 5 |   serializedVersion: 2
 6 |   defaultReferences:
 7 |   - CMAA2Compute: {fileID: 7200000, guid: bdca705f137f466380668861f6afe032, type: 3}
 8 |   executionOrder: 0
 9 |   icon: {instanceID: 0}
10 |   userData: 
11 |   assetBundleName: 
12 |   assetBundleVariant: 
13 | 


--------------------------------------------------------------------------------
/Core/CMAA2RenderPass.cs:
--------------------------------------------------------------------------------
  1 | #define CMAA2_REQUIRE_UNITY_FIX
  2 | 
  3 | using UnityEngine;
  4 | using UnityEngine.Experimental.Rendering;
  5 | using UnityEngine.Rendering;
  6 | using UnityEngine.Rendering.RenderGraphModule;
  7 | using UnityEngine.Rendering.Universal;
  8 | 
  9 | namespace CMAA2.Core
 10 | {
 11 |     public class CMAA2RenderPass : ScriptableRenderPass
 12 |     {
 13 |         private const int m_TextureSampleCount = 1;
 14 | 
 15 |         private readonly CMAA2Compute _compute;
 16 | 
 17 |         public CMAA2RenderPass(ComputeShader cmaa2Compute)
 18 |         {
 19 |             _compute = new CMAA2Compute(cmaa2Compute);
 20 |         }
 21 | 
 22 |         private class PassData
 23 |         {
 24 |             public CMAA2Compute Compute;
 25 |             public TextureHandle ActualFrameColor;
 26 | 
 27 |             public Vector2Int FrameBufferSize;
 28 |             public TextureHandle ColorBackBuffer; // RWTexture2D<float4> : u0
 29 |             public TextureHandle WorkingEdges; // RWTexture2D<uint> : u1
 30 |             public SizedBufferHandle WorkingShapeCandidates; // RWStructuredBuffer<uint> : u2
 31 |             public SizedBufferHandle WorkingDeferredBlendLocationList; // RWStructuredBuffer<uint> : u3
 32 |             public BufferHandle WorkingDeferredBlendItemList; // RWStructuredBuffer<uint2> : u4
 33 |             public AtomicTextureHandle WorkingDeferredBlendItemListHeads; // [RWTexture2D|RWStructuredBuffer]<uint> : u5
 34 |             public BufferHandle WorkingControlBuffer; // RWByteAddressBuffer : u6
 35 |             public BufferHandle WorkingExecuteIndirectBuffer; // RWByteAddressBuffer : u7
 36 |         }
 37 | 
 38 |         public override void RecordRenderGraph(RenderGraph renderGraph, ContextContainer frameData)
 39 |         {
 40 |             var resourceData = frameData.Get<UniversalResourceData>();
 41 |             var cameraData = frameData.Get<UniversalCameraData>();
 42 |             var targetDesc = cameraData.cameraTargetDescriptor;
 43 | 
 44 |             var resX = targetDesc.width;
 45 |             var resY = targetDesc.height;
 46 | 
 47 |             using var builder = renderGraph.AddUnsafePass<PassData>(passName: "CMAA2", passData: out var passData);
 48 |             passData.Compute = _compute;
 49 | 
 50 |             passData.FrameBufferSize = new Vector2Int(resX, resY);
 51 |             passData.ActualFrameColor = resourceData.activeColorTexture;
 52 |             builder.UseTexture(input: resourceData.activeColorTexture);
 53 | 
 54 |             var colorBackBufferDesc = new TextureDesc(resX, resY)
 55 |             {
 56 |                 name = "_ColorBackBufferRW",
 57 |                 format = resourceData.cameraColor.GetDescriptor(renderGraph).colorFormat,
 58 |                 enableRandomWrite = true,
 59 |             };
 60 |             passData.ColorBackBuffer = builder.CreateTransientTexture(colorBackBufferDesc);
 61 |             builder.UseTexture(passData.ColorBackBuffer, AccessFlags.ReadWrite);
 62 | 
 63 |             // create all temporary storage buffers
 64 |             {
 65 |                 int edgesResX = resX;
 66 |                 if (m_TextureSampleCount == 1) edgesResX = (resX + 1) / 2;
 67 |                 var graphicsFormat = m_TextureSampleCount switch
 68 |                 {
 69 |                     1 or 2 => GraphicsFormat.R8_UInt,
 70 |                     4 => GraphicsFormat.R16_UInt,
 71 |                     8 => GraphicsFormat.R32_UInt,
 72 |                     _ => GraphicsFormat.R8_UInt,
 73 |                 };
 74 |                 var uintUAVTextureDesc = new TextureDesc(width: edgesResX, height: resY)
 75 |                 {
 76 |                     format = graphicsFormat,
 77 |                     enableRandomWrite = true,
 78 |                 };
 79 |                 passData.WorkingEdges = builder.CreateTransientTexture(desc: in uintUAVTextureDesc);
 80 |                 builder.UseTexture(passData.WorkingEdges, AccessFlags.ReadWrite);
 81 | 
 82 |                 passData.WorkingDeferredBlendItemListHeads = AtomicTextureHandle.CreateTransientUint(
 83 |                     builder,
 84 |                     (resX + 1) / 2,
 85 |                     (resY + 1) / 2
 86 |                 );
 87 |             }
 88 | 
 89 |             // Bufers
 90 |             int requiredCandidatePixels = resX * resY / 4 * m_TextureSampleCount;
 91 |             int requiredDeferredColorApplyBuffer = resX * resY / 2 * m_TextureSampleCount;
 92 |             int requiredListHeadsPixels = (resX * resY + 3) / 6;
 93 | 
 94 |             // Create buffer for storing a list of all pixel candidates to process (potential AA shapes, both simple and complex)
 95 |             {
 96 |                 var desc = new BufferDesc(
 97 |                     count: requiredCandidatePixels,
 98 |                     stride: sizeof(uint),
 99 |                     target: GraphicsBuffer.Target.Structured
100 |                 );
101 |                 passData.WorkingShapeCandidates = new SizedBufferHandle(
102 |                     builder.CreateTransientBuffer(desc: in desc),
103 |                     desc.count
104 |                 );
105 | #if CMAA2_REQUIRE_UNITY_FIX
106 |                 builder.UseBuffer(passData.WorkingShapeCandidates.Buffer, AccessFlags.ReadWrite);
107 | #endif
108 |             }
109 | 
110 |             // Create buffer for storing linked list of all output values to blend
111 |             {
112 |                 var desc = new BufferDesc(
113 |                     requiredDeferredColorApplyBuffer,
114 |                     sizeof(uint) * 2,
115 |                     GraphicsBuffer.Target.Structured
116 |                 );
117 |                 passData.WorkingDeferredBlendItemList = builder.CreateTransientBuffer(desc);
118 | #if CMAA2_REQUIRE_UNITY_FIX
119 |                 builder.UseBuffer(passData.WorkingDeferredBlendItemList, AccessFlags.ReadWrite);
120 | #endif
121 |             }
122 | 
123 |             // Create buffer for storing a list of coordinates of linked list heads quads, to allow for combined processing in the last step
124 |             {
125 |                 var desc = new BufferDesc(
126 |                     count: requiredListHeadsPixels,
127 |                     stride: sizeof(uint),
128 |                     target: GraphicsBuffer.Target.Structured
129 |                 );
130 |                 passData.WorkingDeferredBlendLocationList = new SizedBufferHandle(
131 |                     builder.CreateTransientBuffer(desc),
132 |                     desc.count
133 |                 );
134 | #if CMAA2_REQUIRE_UNITY_FIX
135 |                 builder.UseBuffer(passData.WorkingDeferredBlendLocationList.Buffer, AccessFlags.ReadWrite);
136 | #endif
137 |             }
138 | 
139 |             // Control buffer (always the same size, doesn't need re-creating but oh well)
140 |             {
141 |                 var desc = new BufferDesc(count: 16, stride: sizeof(uint), target: GraphicsBuffer.Target.Raw);
142 |                 passData.WorkingControlBuffer = builder.CreateTransientBuffer(desc: in desc);
143 | #if CMAA2_REQUIRE_UNITY_FIX
144 |                 builder.UseBuffer(passData.WorkingControlBuffer, AccessFlags.ReadWrite);
145 | #endif
146 |             }
147 | 
148 |             // Control buffer (always the same size, doesn't need re-creating but oh well)
149 |             {
150 |                 var desc = new BufferDesc(
151 |                     count: 4,
152 |                     stride: sizeof(uint),
153 |                     target: GraphicsBuffer.Target.Raw | GraphicsBuffer.Target.IndirectArguments
154 |                 );
155 |                 passData.WorkingExecuteIndirectBuffer = builder.CreateTransientBuffer(desc: in desc);
156 | #if CMAA2_REQUIRE_UNITY_FIX
157 |                 builder.UseBuffer(passData.WorkingExecuteIndirectBuffer, AccessFlags.ReadWrite);
158 | #endif
159 |             }
160 | 
161 |             builder.AllowPassCulling(false);
162 |             builder.SetRenderFunc<PassData>(Render);
163 |         }
164 | 
165 |         private static void Render(PassData data, UnsafeGraphContext context)
166 |         {
167 |             var nativeCmd = CommandBufferHelpers.GetNativeCommandBuffer(context.cmd);
168 |             nativeCmd.Blit(data.ActualFrameColor, data.ColorBackBuffer);
169 | 
170 |             // first pass edge detect
171 |             data.Compute.EdgesColor2x2CS(
172 |                 cmd: context.cmd,
173 |                 inColorTexture: data.ColorBackBuffer,
174 |                 textureResolution: data.FrameBufferSize,
175 |                 workingEdges: data.WorkingEdges,
176 |                 workingShapeCandidates: data.WorkingShapeCandidates,
177 |                 workingDeferredBlendItemListHeads: data.WorkingDeferredBlendItemListHeads,
178 |                 workingControlBuffer: data.WorkingControlBuffer);
179 | 
180 |             // Set up for the first DispatchIndirect
181 |             data.Compute.ComputeDispatchArgsCS(
182 |                 cmd: context.cmd,
183 |                 threadGroupsX: 2,
184 |                 threadGroupsY: 1,
185 |                 workingShapeCandidates: data.WorkingShapeCandidates,
186 |                 workingDeferredBlendLocationList: data.WorkingDeferredBlendLocationList,
187 |                 workingControlBuffer: data.WorkingControlBuffer,
188 |                 workingExecuteIndirectBuffer: data.WorkingExecuteIndirectBuffer
189 |             );
190 | 
191 |             // Process shape candidates DispatchIndirect
192 |             data.Compute.ProcessCandidatesCS(
193 |                 cmd: context.cmd,
194 |                 workingExecuteDirectBuffer: data.WorkingExecuteIndirectBuffer,
195 |                 inColor: data.ColorBackBuffer,
196 |                 workingEdges: data.WorkingEdges,
197 |                 workingDeferredBlendItemListHeads: data.WorkingDeferredBlendItemListHeads,
198 |                 workingControlBuffer: data.WorkingControlBuffer,
199 |                 workingDeferredBlendItemList: data.WorkingDeferredBlendItemList,
200 |                 workingShapeCandidates: data.WorkingShapeCandidates,
201 |                 workingDeferredBlendLocationList: data.WorkingDeferredBlendLocationList
202 |             );
203 | 
204 |             // Set up for the second DispatchIndirect
205 |             data.Compute.ComputeDispatchArgsCS(
206 |                 cmd: context.cmd,
207 |                 threadGroupsX: 1,
208 |                 threadGroupsY: 2,
209 |                 workingShapeCandidates: data.WorkingShapeCandidates,
210 |                 workingDeferredBlendLocationList: data.WorkingDeferredBlendLocationList,
211 |                 workingControlBuffer: data.WorkingControlBuffer,
212 |                 workingExecuteIndirectBuffer: data.WorkingExecuteIndirectBuffer
213 |             );
214 | 
215 |             // Resolve & apply blended colors
216 |             data.Compute.DeferredColorApply2x2CS(
217 |                 context.cmd,
218 |                 workingExecuteIndirectBuffer: data.WorkingExecuteIndirectBuffer,
219 |                 outColor: data.ColorBackBuffer,
220 |                 workingControlBuffer: data.WorkingControlBuffer,
221 |                 workingDeferredBlendItemList: data.WorkingDeferredBlendItemList,
222 |                 workingDeferredBlendItemListHeads: data.WorkingDeferredBlendItemListHeads,
223 |                 workingDeferredBlendLocationList: data.WorkingDeferredBlendLocationList
224 |             );
225 | 
226 |             nativeCmd.Blit(data.ColorBackBuffer, data.ActualFrameColor);
227 |         }
228 |     }
229 | 
230 |     public struct SizedBufferHandle
231 |     {
232 |         public Vector4 Dimensions => new Vector4(Size, 0);
233 | 
234 |         public readonly int Size;
235 |         public readonly BufferHandle Buffer;
236 | 
237 |         public SizedBufferHandle(BufferHandle bufferHandle, int size)
238 |         {
239 |             Buffer = bufferHandle;
240 |             Size = size;
241 |         }
242 |     }
243 | }
244 | 


--------------------------------------------------------------------------------
/Core/CMAA2RenderPass.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 12ba659d27ac490cb07534018ec02d43
3 | timeCreated: 1748371432


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Alexander Malyutin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/LICENSE.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 98c46ce7a67ec410395698cf6180da62
3 | DefaultImporter:
4 |   externalObjects: {}
5 |   userData: 
6 |   assetBundleName: 
7 |   assetBundleVariant: 
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CMAA2 for Unity URP
 2 | 
 3 | **Conservative Morphological Anti-Aliasing 2.0 (CMAA2)** ported to the Unity Universal Render Pipeline.
 4 | 
 5 | CMAA2 is a post-process anti-aliasing technique focused on delivering high-quality edge smoothing while preserving the sharpness of the original image.
 6 | 
 7 | Details of the original implementation and performance analysis are available in Intel’s article:  
 8 | https://www.intel.com/content/dam/develop/external/us/en/documents/conservative-morphological-anti-aliasing.pdf
 9 | 
10 | | CMAA Off               | CMAA On               |
11 | |------------------------|-----------------------|
12 | | ![cmaa-2-disabled-out] | ![cmaa-2-enabled-out] |
13 | 
14 | ## Installation
15 | 
16 | TODO:
17 | 
18 | ## Acknowledgements
19 | 
20 | This project includes a modified version of [`CMAA2.hlsl`](https://github.com/GameTechDev/CMAA2/blob/master/Projects/CMAA2/CMAA2/CMAA2.hlsl) from Intel’s [GameTechDev/CMAA2](https://github.com/GameTechDev/CMAA2) project.  
21 | The original code is licensed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).  
22 | See [`THIRD_PARTY_LICENSES/CMAA2-LICENSE`](THIRD_PARTY_LICENSES/CMAA2-LICENSE) for details.
23 | 
24 | License
25 | -------
26 | This project is MIT License - see the [LICENSE](LICENSE) file for details
27 | 
28 | 
29 | [cmaa-2-disabled-out]: https://github.com/user-attachments/assets/68805e27-e569-4da8-86ff-60912f0709b0
30 | [cmaa-2-enabled-out]: https://github.com/user-attachments/assets/b41fb60c-af01-4f1f-83de-4f68b342e8cc
31 | 


--------------------------------------------------------------------------------
/README.md.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 28fcd06bdc08e4882a16fef24fe44c35
3 | TextScriptImporter:
4 |   externalObjects: {}
5 |   userData: 
6 |   assetBundleName: 
7 |   assetBundleVariant: 
8 | 


--------------------------------------------------------------------------------
/THIRD_PARTY_LICENSES.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: c4eba531045348f3b58542f8aaf063a0
3 | timeCreated: 1748700773


--------------------------------------------------------------------------------
/THIRD_PARTY_LICENSES/CMAA2-LICENSE:
--------------------------------------------------------------------------------
  1 |  
  2 | Apache License
  3 |  Version 2.0, January 2004
  4 | 
  5 |  http://www.apache.org/licenses/ 
  6 | 
  7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 
  8 | 
  9 | 1. Definitions.
 10 | 
 11 | "License" shall mean the terms and conditions for use, reproduction, and 
 12 | distribution as defined by Sections 1 through 9 of this document. 
 13 | 
 14 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 
 15 | owner that is granting the License. 
 16 | 
 17 | "Legal Entity" shall mean the union of the acting entity and all other entities 
 18 | that control, are controlled by, or are under common control with that entity. 
 19 | For the purposes of this definition, "control" means (i) the power, direct or 
 20 | indirect, to cause the direction or management of such entity, whether by 
 21 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 
 22 | outstanding shares, or (iii) beneficial ownership of such entity. 
 23 | 
 24 | "You" (or "Your") shall mean an individual or Legal Entity exercising 
 25 | permissions granted by this License. 
 26 | 
 27 | "Source" form shall mean the preferred form for making modifications, including 
 28 | but not limited to software source code, documentation source, and configuration 
 29 | files. 
 30 | 
 31 | "Object" form shall mean any form resulting from mechanical transformation or 
 32 | translation of a Source form, including but not limited to compiled object code, 
 33 | generated documentation, and conversions to other media types. 
 34 | 
 35 | "Work" shall mean the work of authorship, whether in Source or Object form, made 
 36 | available under the License, as indicated by a copyright notice that is included 
 37 | in or attached to the work (an example is provided in the Appendix below). 
 38 | 
 39 | "Derivative Works" shall mean any work, whether in Source or Object form, that 
 40 | is based on (or derived from) the Work and for which the editorial revisions, 
 41 | annotations, elaborations, or other modifications represent, as a whole, an 
 42 | original work of authorship. For the purposes of this License, Derivative Works 
 43 | shall not include works that remain separable from, or merely link (or bind by 
 44 | name) to the interfaces of, the Work and Derivative Works thereof. 
 45 | 
 46 | "Contribution" shall mean any work of authorship, including the original version 
 47 | of the Work and any modifications or additions to that Work or Derivative Works 
 48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 
 49 | by the copyright owner or by an individual or Legal Entity authorized to submit 
 50 | on behalf of the copyright owner. For the purposes of this definition, 
 51 | "submitted" means any form of electronic, verbal, or written communication sent 
 52 | to the Licensor or its representatives, including but not limited to 
 53 | communication on electronic mailing lists, source code control systems, and 
 54 | issue tracking systems that are managed by, or on behalf of, the Licensor for 
 55 | the purpose of discussing and improving the Work, but excluding communication 
 56 | that is conspicuously marked or otherwise designated in writing by the copyright 
 57 | owner as "Not a Contribution." 
 58 | 
 59 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 
 60 | of whom a Contribution has been received by Licensor and subsequently 
 61 | incorporated within the Work. 
 62 | 
 63 | 2. Grant of Copyright License. Subject to the terms and conditions of this 
 64 | License, each Contributor hereby grants to You a perpetual, worldwide, 
 65 | non-exclusive, no-charge, royalty-free, irrevocable copyright license to 
 66 | reproduce, prepare Derivative Works of, publicly display, publicly perform, 
 67 | sublicense, and distribute the Work and such Derivative Works in Source or 
 68 | Object form. 
 69 | 
 70 | 3. Grant of Patent License. Subject to the terms and conditions of this License, 
 71 | each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, 
 72 | no-charge, royalty-free, irrevocable (except as stated in this section) patent 
 73 | license to make, have made, use, offer to sell, sell, import, and otherwise 
 74 | transfer the Work, where such license applies only to those patent claims 
 75 | licensable by such Contributor that are necessarily infringed by their 
 76 | Contribution(s) alone or by combination of their Contribution(s) with the Work 
 77 | to which such Contribution(s) was submitted. If You institute patent litigation 
 78 | against any entity (including a cross-claim or counterclaim in a lawsuit) 
 79 | alleging that the Work or a Contribution incorporated within the Work 
 80 | constitutes direct or contributory patent infringement, then any patent licenses 
 81 | granted to You under this License for that Work shall terminate as of the date 
 82 | such litigation is filed. 
 83 | 
 84 | 4. Redistribution. You may reproduce and distribute copies of the Work or 
 85 | Derivative Works thereof in any medium, with or without modifications, and in 
 86 | Source or Object form, provided that You meet the following conditions: 
 87 |   You must give any other recipients of the Work or Derivative Works a copy of 
 88 |   this License; and 
 89 | 
 90 | 
 91 |   You must cause any modified files to carry prominent notices stating that You 
 92 |   changed the files; and 
 93 | 
 94 | 
 95 |   You must retain, in the Source form of any Derivative Works that You 
 96 |   distribute, all copyright, patent, trademark, and attribution notices from the 
 97 |   Source form of the Work, excluding those notices that do not pertain to any 
 98 |   part of the Derivative Works; and 
 99 | 
100 | 
101 |   If the Work includes a "NOTICE" text file as part of its distribution, then 
102 |   any Derivative Works that You distribute must include a readable copy of the 
103 |   attribution notices contained within such NOTICE file, excluding those notices 
104 |   that do not pertain to any part of the Derivative Works, in at least one of 
105 |   the following places: within a NOTICE text file distributed as part of the 
106 |   Derivative Works; within the Source form or documentation, if provided along 
107 |   with the Derivative Works; or, within a display generated by the Derivative 
108 |   Works, if and wherever such third-party notices normally appear. The contents 
109 |   of the NOTICE file are for informational purposes only and do not modify the 
110 |   License. You may add Your own attribution notices within Derivative Works that 
111 |   You distribute, alongside or as an addendum to the NOTICE text from the Work, 
112 |   provided that such additional attribution notices cannot be construed as 
113 |   modifying the License.
114 | You may add Your own copyright statement to Your modifications and may provide 
115 | additional or different license terms and conditions for use, reproduction, or 
116 | distribution of Your modifications, or for any such Derivative Works as a whole, 
117 | provided Your use, reproduction, and distribution of the Work otherwise complies 
118 | with the conditions stated in this License. 
119 | 
120 | 5. Submission of Contributions. Unless You explicitly state otherwise, any 
121 | Contribution intentionally submitted for inclusion in the Work by You to the 
122 | Licensor shall be under the terms and conditions of this License, without any 
123 | additional terms or conditions. Notwithstanding the above, nothing herein shall 
124 | supersede or modify the terms of any separate license agreement you may have 
125 | executed with Licensor regarding such Contributions. 
126 | 
127 | 6. Trademarks. This License does not grant permission to use the trade names, 
128 | trademarks, service marks, or product names of the Licensor, except as required 
129 | for reasonable and customary use in describing the origin of the Work and 
130 | reproducing the content of the NOTICE file. 
131 | 
132 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in 
133 | writing, Licensor provides the Work (and each Contributor provides its 
134 | Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
135 | KIND, either express or implied, including, without limitation, any warranties 
136 | or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 
137 | PARTICULAR PURPOSE. You are solely responsible for determining the 
138 | appropriateness of using or redistributing the Work and assume any risks 
139 | associated with Your exercise of permissions under this License. 
140 | 
141 | 8. Limitation of Liability. In no event and under no legal theory, whether in 
142 | tort (including negligence), contract, or otherwise, unless required by 
143 | applicable law (such as deliberate and grossly negligent acts) or agreed to in 
144 | writing, shall any Contributor be liable to You for damages, including any 
145 | direct, indirect, special, incidental, or consequential damages of any character 
146 | arising as a result of this License or out of the use or inability to use the 
147 | Work (including but not limited to damages for loss of goodwill, work stoppage, 
148 | computer failure or malfunction, or any and all other commercial damages or 
149 | losses), even if such Contributor has been advised of the possibility of such 
150 | damages. 
151 | 
152 | 9. Accepting Warranty or Additional Liability. While redistributing the Work or 
153 | Derivative Works thereof, You may choose to offer, and charge a fee for, 
154 | acceptance of support, warranty, indemnity, or other liability obligations 
155 | and/or rights consistent with this License. However, in accepting such 
156 | obligations, You may act only on Your own behalf and on Your sole 
157 | responsibility, not on behalf of any other Contributor, and only if You agree to 
158 | indemnify, defend, and hold each Contributor harmless for any liability incurred 
159 | by, or claims asserted against, such Contributor by reason of your accepting any 
160 | such warranty or additional liability. 
161 | 
162 | END OF TERMS AND CONDITIONS 
163 | 
164 | APPENDIX: How to apply the Apache License to your work 
165 | 
166 | To apply the Apache License to your work, attach the following boilerplate 
167 | notice, with the fields enclosed by brackets "[]" replaced with your own 
168 | identifying information. (Don't include the brackets!) The text should be 
169 | enclosed in the appropriate comment syntax for the file format. We also 
170 | recommend that a file or class name and description of purpose be included on 
171 | the same "printed page" as the copyright notice for easier identification within 
172 | third-party archives. 
173 | 
174 | Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, 
175 | Version 2.0 (the "License"); you may not use this file except in compliance with 
176 | the License. You may obtain a copy of the License at 
177 | http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or 
178 | agreed to in writing, software distributed under the License is distributed on 
179 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 
180 | or implied. See the License for the specific language governing permissions and 
181 | limitations under the License.


--------------------------------------------------------------------------------
/THIRD_PARTY_LICENSES/CMAA2-LICENSE.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: cd23281e4ddd46c7939ee7c2e6b240ea
3 | timeCreated: 1748700785


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "com.alexmalyutindev.cmaa2",
3 |   "version": "1.0.0",
4 |   "displayName": "CMAA2",
5 |   "dependencies": {
6 |     "com.unity.render-pipelines.universal": "17.1.0"
7 |   }
8 | }


--------------------------------------------------------------------------------
/package.json.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: cdd8eac26f90743089384df9517aa03f
3 | PackageManifestImporter:
4 |   externalObjects: {}
5 |   userData: 
6 |   assetBundleName: 
7 |   assetBundleVariant: 
8 | 


--------------------------------------------------------------------------------