#pragma kernel StpSetup #pragma multi_compile _ ENABLE_DEBUG_MODE #pragma multi_compile _ ENABLE_STENCIL_RESPONSIVE #pragma multi_compile _ ENABLE_LARGE_KERNEL #pragma multi_compile _ UNITY_DEVICE_SUPPORTS_NATIVE_16BIT // TODO: Re-enable support for wave reductions (usage of UNITY_DEVICE_SUPPORTS_WAVE_ANY keyword) // // We've run into many platform specific problems when trying to use wave operations for STP's reductions so they're being // disabled for now. Enabling support for wave operations also causes us to use DXC on the 32-bit path on some Qualcomm Android // devices and this triggers visual artifacts that we have no other way to work around at the moment. #pragma multi_compile _ DISABLE_TEXTURE2D_X_ARRAY #pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl" #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/UnityInstancing.hlsl" #define STP_PAT 1 #include "Packages/com.unity.render-pipelines.core/Runtime/STP/StpCommon.hlsl" // // Input // TEXTURE2D_X(_StpInputColor); TEXTURE2D_X(_StpInputDepth); TEXTURE2D_X(_StpInputMotion); #if defined(ENABLE_STENCIL_RESPONSIVE) TYPED_TEXTURE2D_X(uint2, _StpInputStencil); #endif // // Intermediate Output // RW_TEXTURE2D_X(float4, _StpIntermediateColor); RW_TEXTURE2D_X(float, _StpIntermediateConvergence); // // History Input/Output // TYPED_TEXTURE2D_X(uint, _StpPriorDepthMotion); RW_TEXTURE2D_X(uint, _StpDepthMotion); TEXTURE2D_X(_StpPriorLuma); RW_TEXTURE2D_X(float2, _StpLuma); TEXTURE2D_X(_StpPriorConvergence); TEXTURE2D_X(_StpPriorFeedback); #define STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET (SLICE_ARRAY_INDEX * STPSETUPPERVIEWCONSTANTS_COUNT) #if defined(SHADER_API_PSSL) || defined(SHADER_API_SWITCH) || (defined(SHADER_API_METAL) && !defined(SHADER_API_MOBILE)) // Force usage of the 32-bit reduction path even in 16-bit environments #define STP_FORCE_32BIT_REDUCTION #endif #if defined(SHADER_API_PSSL) || defined(SHADER_API_GAMECORE) || defined(SHADER_API_METAL) || (defined(SHADER_API_VULKAN) && defined(SHADER_API_MOBILE)) // Force usage of group shared memory instead using wave operations #define STP_FORCE_GROUPSHARED #endif // Enable the use of wave operations when they're supported by the current hardware and usage of groupshared hasn't been forced. #if defined(UNITY_HW_SUPPORTS_WAVE) && !defined(STP_FORCE_GROUPSHARED) #define STP_ENABLE_WAVEOPS #endif // STP requires a 4x4 reduction which must be implemented by either wave operations, or group shared memory. #if !defined(STP_ENABLE_WAVEOPS) #if defined(STP_16BIT) && !defined(STP_FORCE_32BIT_REDUCTION) groupshared uint4 gs_StpScratch[STP_GROUP_SIZE]; #else groupshared float4 gs_StpScratch[STP_GROUP_SIZE * 2]; #endif #endif // In some cases, we have to expose the 32-bit reduction code in the 16-bit path #if defined(STP_32BIT) || defined(STP_FORCE_32BIT_REDUCTION) void StpPat4x4MaxF8(StpMU1 i, inout StpF4 a, inout StpF4 b) { #if defined(STP_ENABLE_WAVEOPS) a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 1))); a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 1))); a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 1))); a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 1))); b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 1))); b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 1))); b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 1))); b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 1))); a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 2))); a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 2))); a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 2))); a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 2))); b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 2))); b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 2))); b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 2))); b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 2))); a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 4))); a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 4))); a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 4))); a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 4))); b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 4))); b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 4))); b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 4))); b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 4))); a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 8))); a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 8))); a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 8))); a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 8))); b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 8))); b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 8))); b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 8))); b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 8))); #else gs_StpScratch[i] = a; gs_StpScratch[i + STP_GROUP_SIZE] = b; GroupMemoryBarrierWithGroupSync(); // 2x2 Reduction { StpMU1 offset = (i & ~StpMU1(3)); StpMU1 a0 = offset + ((i + StpMU1(1)) & StpMU1(3)); StpMU1 a1 = offset + ((i + StpMU1(2)) & StpMU1(3)); StpMU1 a2 = offset + ((i + StpMU1(3)) & StpMU1(3)); float4 x0 = gs_StpScratch[a0]; float4 x1 = gs_StpScratch[a1]; float4 x2 = gs_StpScratch[a2]; float4 y0 = gs_StpScratch[a0 + STP_GROUP_SIZE]; float4 y1 = gs_StpScratch[a1 + STP_GROUP_SIZE]; float4 y2 = gs_StpScratch[a2 + STP_GROUP_SIZE]; GroupMemoryBarrierWithGroupSync(); a = max(max(max(a, x0), x1), x2); b = max(max(max(b, y0), y1), y2); } gs_StpScratch[i] = a; gs_StpScratch[i + STP_GROUP_SIZE] = b; GroupMemoryBarrierWithGroupSync(); // 4x4 Reduction { StpMU1 offset = (i & ~StpMU1(15)); StpMU1 a0 = offset + ((i + StpMU1(4)) & StpMU1(15)); StpMU1 a1 = offset + ((i + StpMU1(8)) & StpMU1(15)); StpMU1 a2 = offset + ((i + StpMU1(12)) & StpMU1(15)); float4 x0 = gs_StpScratch[a0]; float4 x1 = gs_StpScratch[a1]; float4 x2 = gs_StpScratch[a2]; float4 y0 = gs_StpScratch[a0 + STP_GROUP_SIZE]; float4 y1 = gs_StpScratch[a1 + STP_GROUP_SIZE]; float4 y2 = gs_StpScratch[a2 + STP_GROUP_SIZE]; GroupMemoryBarrierWithGroupSync(); a = max(max(max(a, x0), x1), x2); b = max(max(max(b, y0), y1), y2); } #endif } void StpPat4x4SumF4(StpMU1 i, inout StpF4 a) { #if defined(STP_ENABLE_WAVEOPS) a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 1)); a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 1)); a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 1)); a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 1)); a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 2)); a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 2)); a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 2)); a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 2)); a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 4)); a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 4)); a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 4)); a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 4)); a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 8)); a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 8)); a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 8)); a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 8)); #else gs_StpScratch[i] = a; GroupMemoryBarrierWithGroupSync(); // 2x2 Reduction { StpMU1 offset = (i & ~StpMU1(3)); StpMU1 a0 = offset + ((i + StpMU1(1)) & StpMU1(3)); StpMU1 a1 = offset + ((i + StpMU1(2)) & StpMU1(3)); StpMU1 a2 = offset + ((i + StpMU1(3)) & StpMU1(3)); float4 x0 = gs_StpScratch[a0]; float4 x1 = gs_StpScratch[a1]; float4 x2 = gs_StpScratch[a2]; GroupMemoryBarrierWithGroupSync(); a = a + x0 + x1 + x2; } gs_StpScratch[i] = a; GroupMemoryBarrierWithGroupSync(); // 4x4 Reduction { StpMU1 offset = (i & ~StpMU1(15)); StpMU1 a0 = offset + ((i + StpMU1(4)) & StpMU1(15)); StpMU1 a1 = offset + ((i + StpMU1(8)) & StpMU1(15)); StpMU1 a2 = offset + ((i + StpMU1(12)) & StpMU1(15)); float4 x0 = gs_StpScratch[a0]; float4 x1 = gs_StpScratch[a1]; float4 x2 = gs_StpScratch[a2]; GroupMemoryBarrierWithGroupSync(); a = a + x0 + x1 + x2; } #endif } #endif #if defined(STP_16BIT) void StpPat4x4MaxH8(StpW1 i, inout StpH4 a, inout StpH4 b) { #if defined(STP_FORCE_32BIT_REDUCTION) StpPat4x4MaxF8(i, a, b); #else #if defined(STP_ENABLE_WAVEOPS) a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 1))); a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 1))); b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 1))); b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 1))); a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 2))); a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 2))); b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 2))); b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 2))); a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 4))); a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 4))); b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 4))); b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 4))); a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 8))); a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 8))); b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 8))); b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 8))); #else gs_StpScratch[i] = StpU4(StpU1_H2(a.xy), StpU1_H2(a.zw), StpU1_H2(b.xy), StpU1_H2(b.zw)); GroupMemoryBarrierWithGroupSync(); // 2x2 Reduction { StpW1 offset = (i & ~StpW1(3)); StpW1 a0 = offset + ((i + StpW1(1)) & StpW1(3)); StpW1 a1 = offset + ((i + StpW1(2)) & StpW1(3)); StpW1 a2 = offset + ((i + StpW1(3)) & StpW1(3)); uint4 x0 = gs_StpScratch[a0]; uint4 x1 = gs_StpScratch[a1]; uint4 x2 = gs_StpScratch[a2]; GroupMemoryBarrierWithGroupSync(); a.xy = max(max(max(a.xy, StpH2_U1(x0.x)), StpH2_U1(x1.x)), StpH2_U1(x2.x)); a.zw = max(max(max(a.zw, StpH2_U1(x0.y)), StpH2_U1(x1.y)), StpH2_U1(x2.y)); b.xy = max(max(max(b.xy, StpH2_U1(x0.z)), StpH2_U1(x1.z)), StpH2_U1(x2.z)); b.zw = max(max(max(b.zw, StpH2_U1(x0.w)), StpH2_U1(x1.w)), StpH2_U1(x2.w)); } gs_StpScratch[i] = StpU4(StpU1_H2(a.xy), StpU1_H2(a.zw), StpU1_H2(b.xy), StpU1_H2(b.zw)); GroupMemoryBarrierWithGroupSync(); // 4x4 Reduction { StpW1 offset = (i & ~StpW1(15)); StpW1 a0 = offset + ((i + StpW1(4)) & StpW1(15)); StpW1 a1 = offset + ((i + StpW1(8)) & StpW1(15)); StpW1 a2 = offset + ((i + StpW1(12)) & StpW1(15)); uint4 x0 = gs_StpScratch[a0]; uint4 x1 = gs_StpScratch[a1]; uint4 x2 = gs_StpScratch[a2]; GroupMemoryBarrierWithGroupSync(); a.xy = max(max(max(a.xy, StpH2_U1(x0.x)), StpH2_U1(x1.x)), StpH2_U1(x2.x)); a.zw = max(max(max(a.zw, StpH2_U1(x0.y)), StpH2_U1(x1.y)), StpH2_U1(x2.y)); b.xy = max(max(max(b.xy, StpH2_U1(x0.z)), StpH2_U1(x1.z)), StpH2_U1(x2.z)); b.zw = max(max(max(b.zw, StpH2_U1(x0.w)), StpH2_U1(x1.w)), StpH2_U1(x2.w)); } #endif #endif } void StpPat4x4SumH4(StpW1 i, inout StpH4 a) { #if defined(STP_FORCE_32BIT_REDUCTION) StpPat4x4SumF4(i, a); #else #if defined(STP_ENABLE_WAVEOPS) a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 1)); a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 1)); a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 2)); a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 2)); a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 4)); a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 4)); a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 8)); a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 8)); #else gs_StpScratch[i].xy = StpU2(StpU1_H2(a.xy), StpU1_H2(a.zw)); GroupMemoryBarrierWithGroupSync(); // 2x2 Reduction { StpW1 offset = (i & ~StpW1(3)); StpW1 a0 = offset + ((i + StpW1(1)) & StpW1(3)); StpW1 a1 = offset + ((i + StpW1(2)) & StpW1(3)); StpW1 a2 = offset + ((i + StpW1(3)) & StpW1(3)); uint2 x0 = gs_StpScratch[a0].xy; uint2 x1 = gs_StpScratch[a1].xy; uint2 x2 = gs_StpScratch[a2].xy; GroupMemoryBarrierWithGroupSync(); a.xy = a.xy + StpH2_U1(x0.x) + StpH2_U1(x1.x) + StpH2_U1(x2.x); a.zw = a.zw + StpH2_U1(x0.y) + StpH2_U1(x1.y) + StpH2_U1(x2.y); } gs_StpScratch[i].xy = StpU2(StpU1_H2(a.xy), StpU1_H2(a.zw)); GroupMemoryBarrierWithGroupSync(); // 4x4 Reduction { StpW1 offset = (i & ~StpW1(15)); StpW1 a0 = offset + ((i + StpW1(4)) & StpW1(15)); StpW1 a1 = offset + ((i + StpW1(8)) & StpW1(15)); StpW1 a2 = offset + ((i + StpW1(12)) & StpW1(15)); uint2 x0 = gs_StpScratch[a0].xy; uint2 x1 = gs_StpScratch[a1].xy; uint2 x2 = gs_StpScratch[a2].xy; GroupMemoryBarrierWithGroupSync(); a.xy = a.xy + StpH2_U1(x0.x) + StpH2_U1(x1.x) + StpH2_U1(x2.x); a.zw = a.zw + StpH2_U1(x0.y) + StpH2_U1(x1.y) + StpH2_U1(x2.y); } #endif #endif } StpH1 StpPatPriConH(StpF2 p) { return (StpH1)SAMPLE_TEXTURE2D_X_LOD(_StpPriorConvergence, s_linear_clamp_sampler, p, 0); } // These are separate to support inline operation (pass merged instead of loads). StpF2 StpPatDatMotH(StpW2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputMotion, o, 0).xy; } StpH3 StpPatDatColH(StpW2 o) { return (StpH3)LOAD_TEXTURE2D_X_LOD(_StpInputColor, o, 0).rgb; } StpF1 StpPatDatZH(StpW2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputDepth, o, 0).x; } // This provides a place to convert Z from depth to linear if not inlined and actually loaded. StpF1 StpPatFixZH(StpF1 z) { return 1.0 / (STP_ZBUFFER_PARAMS_Z * z + STP_ZBUFFER_PARAMS_W); } StpU1 StpPatDatRH(StpW2 o) { #if defined(ENABLE_STENCIL_RESPONSIVE) return GetStencilValue(LOAD_TEXTURE2D_X_LOD(_StpInputStencil, o, 0).xy); #endif // defined(ENABLE_STENCIL_RESPONSIVE) return StpU1_(0); } StpH1 StpPatFixRH(StpU1 v) { // Activate the "responsive" feature when we don't have valid history textures. bool hasValidHistory = DecodeHasValidHistory(STP_COMMON_CONSTANT); bool excludeTaa = false; #if defined(ENABLE_STENCIL_RESPONSIVE) excludeTaa = (v & DecodeStencilMask(STP_COMMON_CONSTANT)) != 0; #endif // defined(ENABLE_STENCIL_RESPONSIVE) return (hasValidHistory && !excludeTaa) ? StpH1_(1.0) : StpH1_(0.0); } StpH1 StpPatDitH(StpW2 o) { return StpDitH1(o); } StpH4 StpPatPriFedH(StpF2 p) { return (StpH4)SAMPLE_TEXTURE2D_X_LOD(_StpPriorFeedback, s_linear_clamp_sampler, p, 0); } StpH4 StpPatPriFedR4H(StpF2 p) { return (StpH4)GATHER_RED_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } StpH4 StpPatPriFedG4H(StpF2 p) { return (StpH4)GATHER_GREEN_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } StpH4 StpPatPriFedB4H(StpF2 p) { return (StpH4)GATHER_BLUE_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } StpH2 StpPatPriLumH(StpF2 p) { return (StpH2)SAMPLE_TEXTURE2D_X_LOD(_StpPriorLuma, s_linear_clamp_sampler, p, 0); } StpU4 StpPatPriMot4H(StpF2 p) { return GATHER_RED_TEXTURE2D_X(_StpPriorDepthMotion, s_point_clamp_sampler, p); } void StpPatStMotH(StpW2 p, StpU1 v) { _StpDepthMotion[COORD_TEXTURE2D_X(p)] = v; } void StpPatStColH(StpW2 p, StpH4 v) { _StpIntermediateColor[COORD_TEXTURE2D_X(p)] = v; } void StpPatStLumH(StpW2 p, StpH2 v) { _StpLuma[COORD_TEXTURE2D_X(p)] = v; } void StpPatStCnvH(StpW2 p, StpH1 v) { _StpIntermediateConvergence[COORD_TEXTURE2D_X(p >> StpW1(2))] = v; } #endif #if defined(STP_32BIT) StpMF1 StpPatPriConF(StpF2 p) { return (StpMF1)SAMPLE_TEXTURE2D_X_LOD(_StpPriorConvergence, s_linear_clamp_sampler, p, 0); } // These are separate to support inline operation (pass merged instead of loads). StpF2 StpPatDatMotF(StpMU2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputMotion, o, 0).xy; } StpMF3 StpPatDatColF(StpMU2 o) { return (StpMF3)LOAD_TEXTURE2D_X_LOD(_StpInputColor, o, 0).rgb; } StpF1 StpPatDatZF(StpMU2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputDepth, o, 0).x; } // This provides a place to convert Z from depth to linear if not inlined and actually loaded. StpF1 StpPatFixZF(StpF1 z) { return 1.0 / (STP_ZBUFFER_PARAMS_Z * z + STP_ZBUFFER_PARAMS_W); } StpU1 StpPatDatRF(StpMU2 o) { #if defined(ENABLE_STENCIL_RESPONSIVE) return GetStencilValue(LOAD_TEXTURE2D_X_LOD(_StpInputStencil, o, 0).xy); #endif // defined(ENABLE_STENCIL_RESPONSIVE) return StpU1_(0); } StpMF1 StpPatFixRF(StpU1 v) { // Activate the "responsive" feature when we don't have valid history textures. bool hasValidHistory = DecodeHasValidHistory(STP_COMMON_CONSTANT); bool excludeTaa = false; #if defined(ENABLE_STENCIL_RESPONSIVE) excludeTaa = (v & DecodeStencilMask(STP_COMMON_CONSTANT)) != 0; #endif // defined(ENABLE_STENCIL_RESPONSIVE) return (hasValidHistory && !excludeTaa) ? StpMF1_(1.0) : StpMF1_(0.0); } StpMF1 StpPatDitF(StpMU2 o) { return (StpMF1)StpDitF1(o); } StpMF4 StpPatPriFedF(StpF2 p) { return (StpMF4)SAMPLE_TEXTURE2D_X_LOD(_StpPriorFeedback, s_linear_clamp_sampler, p, 0); } StpMF4 StpPatPriFedR4F(StpF2 p) { return (StpMF4)GATHER_RED_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } StpMF4 StpPatPriFedG4F(StpF2 p) { return (StpMF4)GATHER_GREEN_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } StpMF4 StpPatPriFedB4F(StpF2 p) { return (StpMF4)GATHER_BLUE_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } StpMF2 StpPatPriLumF(StpF2 p) { return (StpMF2)SAMPLE_TEXTURE2D_X_LOD(_StpPriorLuma, s_linear_clamp_sampler, p, 0); } StpU4 StpPatPriMot4F(StpF2 p) { return GATHER_RED_TEXTURE2D_X(_StpPriorDepthMotion, s_point_clamp_sampler, p); } void StpPatStMotF(StpMU2 p, StpU1 v) { _StpDepthMotion[COORD_TEXTURE2D_X(p)] = v; } void StpPatStColF(StpMU2 p, StpMF4 v) { _StpIntermediateColor[COORD_TEXTURE2D_X(p)] = v; } void StpPatStLumF(StpMU2 p, StpMF2 v) { _StpLuma[COORD_TEXTURE2D_X(p)] = v; } void StpPatStCnvF(StpMU2 p, StpMF1 v) { _StpIntermediateConvergence[COORD_TEXTURE2D_X(p >> StpMU1(2))] = v; } #endif #define THREADING_BLOCK_SIZE STP_GROUP_SIZE #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Threading.hlsl" [numthreads(STP_GROUP_SIZE, 1, 1)] void StpSetup(Threading::Group group) { UNITY_XR_ASSIGN_VIEW_INDEX(group.groupID.z); #if defined(STP_16BIT) StpW1 lane = StpW1_(group.groupIndex); StpW2 groupPos = ComputeGroupPos(StpW2(group.groupID.xy)); StpW2 pos = groupPos + StpRemapLaneTo8x16H(lane); #else StpMU1 lane = StpMU1_(group.groupIndex); StpMU2 groupPos = ComputeGroupPos(StpMU2(group.groupID.xy)); StpMU2 pos = groupPos + StpRemapLaneTo8x16F(lane); #endif #if defined(STP_16BIT) StpPatH( lane, pos, #else StpPatF( lane, pos, #endif asuint(_StpSetupConstants0), asuint(_StpSetupConstants1), asuint(_StpSetupConstants2), asuint(_StpSetupConstants3), asuint(_StpSetupConstants4), asuint(_StpSetupConstants5), asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 0]), asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 1]), asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 2]), asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 3]), asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 4]), asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 5]), asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 6]), asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 7]) ); }