#version 440 #define int2 ivec2 #define int3 ivec3 #define int4 ivec4 #define half float #define half2 vec2 #define half3 vec3 #define half4 vec4 #define float2 vec2 #define float3 vec3 #define float4 vec4 #define float2x2 mat2 #define float3x3 mat3 #define float3x4 mat3x4 #define float4x4 mat4 #define saturate(value) clamp((value), 0.0f, 1.0f) #define rsqrt(value) inversesqrt(value) #define RING_1 (1) #define RING_2 (2) #define RING_3 (3) #define RING_4 (4) #define NUM_RING_1_GATHERS 2 #define NUM_RING_2_GATHERS 6 #define NUM_RING_3_GATHERS 12 #define NUM_RING_4_GATHERS 20 // Ring sample pattern const float2 g_f2HDAORingPattern[NUM_RING_4_GATHERS] = { // Ring 1 { 1, -1 }, { 0, 1 }, // Ring 2 { 0, 3 }, { 2, 1 }, { 3, -1 }, { 1, -3 }, // Ring 3 { 1, -5 }, { 3, -3 }, { 5, -1 }, { 4, 1 }, { 2, 3 }, { 0, 5 }, // Ring 4 { 0, 7 }, { 2, 5 }, { 4, 3 }, { 6, 1 }, { 7, -1 }, { 5, -3 }, { 3, -5 }, { 1, -7 }, }; // Ring weights const float4 g_f4HDAORingWeight[NUM_RING_4_GATHERS] = { // Ring 1 (Sum = 5.30864) { 1.00000, 0.50000, 0.44721, 0.70711 }, { 0.50000, 0.44721, 0.70711, 1.00000 }, // Ring 2 (Sum = 6.08746) { 0.30000, 0.29104, 0.37947, 0.40000 }, { 0.42426, 0.33282, 0.37947, 0.53666 }, { 0.40000, 0.30000, 0.29104, 0.37947 }, { 0.53666, 0.42426, 0.33282, 0.37947 }, // Ring 3 (Sum = 6.53067) { 0.31530, 0.29069, 0.24140, 0.25495 }, { 0.36056, 0.29069, 0.26000, 0.30641 }, { 0.26000, 0.21667, 0.21372, 0.25495 }, { 0.29069, 0.24140, 0.25495, 0.31530 }, { 0.29069, 0.26000, 0.30641, 0.36056 }, { 0.21667, 0.21372, 0.25495, 0.26000 }, // Ring 4 (Sum = 7.00962) { 0.17500, 0.17365, 0.19799, 0.20000 }, { 0.22136, 0.20870, 0.24010, 0.25997 }, { 0.24749, 0.21864, 0.24010, 0.28000 }, { 0.22136, 0.19230, 0.19799, 0.23016 }, { 0.20000, 0.17500, 0.17365, 0.19799 }, { 0.25997, 0.22136, 0.20870, 0.24010 }, { 0.28000, 0.24749, 0.21864, 0.24010 }, { 0.23016, 0.22136, 0.19230, 0.19799 }, }; const float g_fRingWeightsTotal[RING_4] = { 5.30864, 11.39610, 17.92677, 24.93639, }; #define NUM_NORMAL_LOADS 4 const int2 g_i2NormalLoadPattern[NUM_NORMAL_LOADS] = { { 1, 8 }, { 8, -1 }, { 5, 4 }, { 4, -4 }, }; //-------------------------------------------------------------------------------------- // Helper function to gather Z values //-------------------------------------------------------------------------------------- float LinearEyeDepth(float d, float2 zConstants) { float z1 = zConstants.x; float z2 = zConstants.y; float z = (1.0f / (d * z1 + z2)); return z; } float4 LinearEyeDepth(float4 d, float2 zConstants) { float4 z1 = zConstants.xxxx; float4 z2 = zConstants.yyyy; float4 z = (1.0f / (d * z1 + z2)); return z; } float4 GatherZSamples(sampler2D depthTex, float2 f2TexCoord, float2 zConstants) { float4 f4Gather; f4Gather.x = textureLodOffset(depthTex,f2TexCoord, 0, int2(0, 1)).x; f4Gather.y = textureLodOffset(depthTex,f2TexCoord, 0, int2(1, 1)).x; f4Gather.z = textureLodOffset(depthTex,f2TexCoord, 0, int2(1, 0)).x; f4Gather.w = textureLodOffset(depthTex,f2TexCoord, 0, int2(0, 0)).x; float4 f4Ret = LinearEyeDepth(f4Gather, zConstants); return f4Ret; } float3 ReconstructCSPosition(float2 posInNDC, float projZ, float4x4 mInvProj) { float4 pos = float4(posInNDC, projZ * 2 - 1, 1); //float4 posInCS = mul(pos, mInvProj); float4 posInCS = mInvProj * pos; return posInCS.xyz / posInCS.w; } float3 GetPosition(sampler2D depthTex,float2 uv, float4x4 mInvProj) { float3 P; P.z = textureLod(depthTex, uv, 0).x; // assume DirectX style UV, // remap x: [0,1]->[-1,1] y:[0,1]->[1,-1] float2 ndcPos = (uv - 0.5f) * float2(2, -2); P = ReconstructCSPosition(ndcPos, P.z, mInvProj); return P; } float3 GetCameraXYZFromDepth(float depth, float2 UV, float4x4 invProj) { float4 ndcPos = float4(UV.x * 2.0f - 1.0f, (1.0f - UV.y) * 2.0f - 1.0f, depth, 1); return ReconstructCSPosition(ndcPos.xy, ndcPos.z, invProj); } float GeometryRejectionTest( int2 i2ScreenCoord, float2 rtSize, float4x4 invProj, sampler2D a_depthTex) { float3 f3N[3]; float3 f3Pos[3]; float3 f3Dir[2]; float fDot; float fSummedDot = 0.0f; int2 i2MirrorPattern; int2 i2OffsetScreenCoord; int2 i2MirrorOffsetScreenCoord; float fDepth; fDepth = texelFetch(a_depthTex,i2ScreenCoord, 0 ).x; f3Pos[0] = GetCameraXYZFromDepth( fDepth, i2ScreenCoord / rtSize.xy, invProj); for( int iNormal=0; iNormal<NUM_NORMAL_LOADS; iNormal++ ) { i2MirrorPattern = ( g_i2NormalLoadPattern[iNormal] + int2( 1, 1 ) ) * int2( -1, -1 ); i2OffsetScreenCoord = i2ScreenCoord + g_i2NormalLoadPattern[iNormal]; i2MirrorOffsetScreenCoord = i2ScreenCoord + i2MirrorPattern; // Clamp our test to screen coordinates //i2OffsetScreenCoord = ( i2OffsetScreenCoord > (rtSize.xy - float2( 1.0f, 1.0f ) ) ) ? (rtSize.xy - float2( 1.0f, 1.0f ) ) : ( i2OffsetScreenCoord ); int2 rtSizeMinusOne = ivec2(rtSize.xy - float2( 1.0f, 1.0f )); i2OffsetScreenCoord.x = i2OffsetScreenCoord.x > rtSizeMinusOne.x?rtSizeMinusOne.x:i2OffsetScreenCoord.x; i2OffsetScreenCoord.y = i2OffsetScreenCoord.y > rtSizeMinusOne.y?rtSizeMinusOne.y:i2OffsetScreenCoord.y; //i2MirrorOffsetScreenCoord = ( i2MirrorOffsetScreenCoord > (rtSize.xy - float2( 1.0f, 1.0f ) ) ) ? (rtSize.xy - float2( 1.0f, 1.0f ) ) : ( i2MirrorOffsetScreenCoord ); i2MirrorOffsetScreenCoord.x = i2MirrorOffsetScreenCoord.x > rtSizeMinusOne.x?rtSizeMinusOne.x:i2MirrorOffsetScreenCoord.x; i2MirrorOffsetScreenCoord.y = i2MirrorOffsetScreenCoord.y > rtSizeMinusOne.y?rtSizeMinusOne.y:i2MirrorOffsetScreenCoord.y; //i2OffsetScreenCoord = ( i2OffsetScreenCoord < 0 ) ? ( 0 ) : ( i2OffsetScreenCoord ); //i2MirrorOffsetScreenCoord = ( i2MirrorOffsetScreenCoord < 0 ) ? ( 0 ) : ( i2MirrorOffsetScreenCoord ); if(i2OffsetScreenCoord.x<0) {i2OffsetScreenCoord.x = 0;} if(i2OffsetScreenCoord.y<0) {i2OffsetScreenCoord.y = 0;} if(i2MirrorOffsetScreenCoord.x<0) {i2MirrorOffsetScreenCoord.x = 0;} if(i2MirrorOffsetScreenCoord.y<0) {i2MirrorOffsetScreenCoord.y = 0;} fDepth = texelFetch(a_depthTex,i2OffsetScreenCoord,0).x; f3Pos[1] = GetCameraXYZFromDepth( fDepth, i2OffsetScreenCoord / rtSize.xy, invProj ); fDepth = texelFetch(a_depthTex,i2MirrorOffsetScreenCoord,0).x; f3Pos[2] = GetCameraXYZFromDepth( fDepth, i2MirrorOffsetScreenCoord / rtSize.xy, invProj); f3Dir[0] = f3Pos[1] - f3Pos[0]; f3Dir[1] = f3Pos[2] - f3Pos[0]; f3Dir[0] = normalize( f3Dir[0] ); f3Dir[1] = normalize( f3Dir[1] ); fDot = dot( f3Dir[0], f3Dir[1] ); fSummedDot += ( fDot + 2.0f ); } return ( fSummedDot * 0.125f ); } ////////////////////////////these functions are used for generating ao.gb for ssaoblur///////////////////////////// /** Used for packing Z into the GB channels */ float CSZToKey(float z, float a_fZFar) { return saturate(z * (1.0 / a_fZFar)); } /** Used for packing Z into the GB channels */ float2 packKey(float key) { float2 outp; // Round to the nearest 1/256.0 float temp = floor(key * 256.0); // Integer part outp.x = temp * (1.0 / 256.0); // Fractional part outp.y = key * 256.0 - temp; return outp; } float UnpackKey(float2 fDepthGb) { return fDepthGb.x * (256.0 / 257.0) + fDepthGb.y * (1.0 / 257.0); } layout(binding = 12 ) uniform HardcodeConstantBuffer { vec4 dummy0; vec4 dummy1; vec4 dummy2; vec4 dummy3; vec4 dummy4; vec4 dummy5; vec4 dummy6; vec4 dummy7; vec4 dummy8; vec4 dummy9; vec4 dummy10; vec4 dummy11; vec4 dummy12; vec4 dummy13; vec4 dummy14; vec4 dummy15; vec4 dummy16; float4x4 g_mInvProj; float4x4 g_mInvTransposeWorldToCamera; vec4 g_RTSize; vec4 g_HDAO_ZConstants; vec4 g_Falloff; vec4 g_MiscParam0; vec4 g_MiscParam1; }; #define g_FadeoutScale g_MiscParam0.x // Param : FadeoutScale #define g_RejectRadius g_MiscParam0.y // Param : RejectRadius #define g_AcceptRadius g_MiscParam0.z // Param : AcceptRadius #define g_AcceptAngle g_MiscParam0.w // Param : AcceptAngle #define g_HDAOIntensity g_MiscParam1.x // Param : Intensity #define g_KernelScale g_MiscParam1.y // Param : KernelScale #define g_CameraFar g_MiscParam1.z // Param : CameraFar #define g_UseNormal false #define g_HDAO_NormalScale 1.0f layout(binding = 0) uniform sampler2D g_depthTex; //layout(binding = 1) uniform sampler2D g_normalTex; struct PS_Input { float4 Position; float2 uv; }; in PS_Input IN; out vec4 myOut; void main() { float2 rYuv = float2(IN.uv.x, 1 - IN.uv.y); // Compute integer screen coord, and store off the inverse of the RT Size float2 f2InvRTSize = 1.0f / g_RTSize.xy; float2 f2ScreenCoord = rYuv * g_RTSize.xy; int2 i2ScreenCoord = int2(f2ScreenCoord); // View space point being shaded float3 f3CameraPos = GetPosition(g_depthTex, rYuv.xy, g_mInvProj); // generate .gb for ssaoblur float2 f2Ogb = packKey(CSZToKey(f3CameraPos.z, g_CameraFar)); float fDot = GeometryRejectionTest(i2ScreenCoord, g_RTSize.xy, g_mInvProj, g_depthTex); if (fDot > 0.5f) { // Sample the center pixel for camera Z float2 f2TexCoord = float2(f2ScreenCoord * f2InvRTSize); float fDepth = textureLod(g_depthTex, f2TexCoord,0).x; float fCenterZ = LinearEyeDepth(fDepth, g_HDAO_ZConstants.xy); float fOffsetCenterZ = fCenterZ; float2 f2KernelScale = float2(g_KernelScale, g_KernelScale); float4 f4Occlusion = float4(0.0f); // Loop through each gather location, and compare with its mirrored location for (int iGather = 0; iGather < NUM_RING_4_GATHERS; iGather++) { float4 f4Diff = float4(0.0f); float4 f4SampledZ[2]; float4 f4Compare[2]; float2 f2MirrorScreenCoord = ((f2KernelScale * g_f2HDAORingPattern[iGather]) + float2(1.0f, 1.0f)) * float2(-1.0f, -1.0f); f2TexCoord = float2((f2ScreenCoord + (f2KernelScale * g_f2HDAORingPattern[iGather])) * f2InvRTSize); float2 f2MirrorTexCoord = float2((f2ScreenCoord + (f2MirrorScreenCoord)) * f2InvRTSize); // Sample f4SampledZ[0] = GatherZSamples(g_depthTex, f2TexCoord, g_HDAO_ZConstants.xy); f4SampledZ[1] = GatherZSamples(g_depthTex, f2MirrorTexCoord, g_HDAO_ZConstants.xy); // Detect valleys f4Diff = fCenterZ.xxxx - f4SampledZ[0]; //f4Compare[0] = (f4Diff < g_RejectRadius.xxxx) ? (1.0f) : (0.0f); f4Compare[0] = vec4(lessThan(f4Diff, g_RejectRadius.xxxx)); //f4Compare[0] *= (f4Diff > g_AcceptRadius.xxxx) ? (1.0f) : (0.0f); f4Compare[0] *= vec4(greaterThan(f4Diff, g_AcceptRadius.xxxx)); f4Diff = fCenterZ.xxxx - f4SampledZ[1]; //f4Compare[1] = (f4Diff < g_RejectRadius.xxxx) ? (1.0f) : (0.0f); f4Compare[1] = vec4(lessThan(f4Diff, g_RejectRadius.xxxx)); //f4Compare[1] *= (f4Diff > g_AcceptRadius.xxxx) ? (1.0f) : (0.0f); f4Compare[1] *= vec4(greaterThan(f4Diff, g_AcceptRadius.xxxx)); f4Occlusion.xyzw += (g_f4HDAORingWeight[iGather].xyzw * (f4Compare[0].xyzw * f4Compare[1].zwxy) * fDot); } // Finally calculate the HDAO occlusion value float fOcclusion = 0; fOcclusion = ((f4Occlusion.x + f4Occlusion.y + f4Occlusion.z + f4Occlusion.w) / (2.0f * g_fRingWeightsTotal[RING_4 - 1])); float falloff = 1 - saturate((fCenterZ - g_Falloff.x) / (g_Falloff.y)); fOcclusion *= g_HDAOIntensity * falloff; fOcclusion = 1.0f - saturate(fOcclusion); myOut = float4(fOcclusion, f2Ogb, 1.0f); } else { myOut = float4(1.0f, f2Ogb, 1.0f); } }
#version 440 #define int2 ivec2 #define int3 ivec3 #define int4 ivec4 #define half float #define half2 vec2 #define half3 vec3 #define half4 vec4 #define float2 vec2 #define float3 vec3 #define float4 vec4 #define float2x2 mat2 #define float3x3 mat3 #define float3x4 mat3x4 #define float4x4 mat4 #define saturate(value) clamp((value), 0.0f, 1.0f) #define rsqrt(value) inversesqrt(value) float UnpackKey(float2 fDepthGb) { return fDepthGb.x * (256.0 / 257.0) + fDepthGb.y * (1.0 / 257.0); } layout(binding = 12 ) uniform HardcodeConstantBuffer { vec4 dummy0; vec4 dummy1; vec4 dummy2; vec4 dummy3; vec4 dummy4; vec4 dummy5; vec4 dummy6; vec4 dummy7; vec4 dummy8; vec4 dummy9; vec4 dummy10; vec4 dummy11; vec4 dummy12; vec4 dummy13; vec4 dummy14; vec4 dummy15; vec4 dummy16; vec4 v4_g_ssao_BlurFilterDistance; vec4 v4_g_mainTex_TexelSize; vec4 v4_g_BlurAxis; vec4 v4_g_EdgeSharpness; }; #define g_ssao_BlurFilterDistance v4_g_ssao_BlurFilterDistance.x #define g_mainTex_TexelSize v4_g_mainTex_TexelSize.xy #define g_BlurAxis v4_g_BlurAxis.xy #define g_EdgeSharpness v4_g_EdgeSharpness.x struct PS_Input { float4 Position; float2 uv ; }; //-------------------------------------------------------------------------------------- // After pack z value in aoTex.gb(integer and fraction) unpack it to add together and in (0,1) //-------------------------------------------------------------------------------------- const float gaussian[5] = { 0.153170, 0.144893, 0.122649, 0.092902, 0.062970 }; // stddev = 2.0 layout(binding = 0) uniform sampler2D g_mainTex; //-------------------------------------------------------------------------------------- // Blur SSAO ways //-------------------------------------------------------------------------------------- in PS_Input IN; out vec4 myOut; void main() { float4 fragment = float4(1, 1, 1, 1); //reverse y axis //float2 ssC = IN.uv.xy; float2 rYuv = float2(IN.uv.x, 1 - IN.uv.y); float2 ssC = rYuv.xy; float4 temp = textureLod(g_mainTex,rYuv, 0); float2 passthrough2 = temp.gb; float key = UnpackKey(passthrough2); float sum = temp.r; // Base weight for depth falloff. Increase this for more blurriness, decrease it for better edge discrimination float BASE = gaussian[0] * 0.5; // ole: i decreased float totalWeight = BASE; sum *= totalWeight; // We already handled the zero case above. This loop should be unrolled and the branch discarded { temp = textureLod(g_mainTex, ssC + g_BlurAxis * g_mainTex_TexelSize.xy * (-4.0f * g_ssao_BlurFilterDistance), 0); float tapKey = UnpackKey(temp.gb); float value = temp.r; // spatial domain: offset gaussian tap float weight = 0.3 + gaussian[4]; // range domain (the "bilateral" weight). As depth difference increases, decrease weight. // these two keys is depth value which we stored in AoTex generate in .gb weight *= max(0.0, 1.0 - (2000.0 * g_EdgeSharpness) * abs(tapKey - key)); sum += value * weight; totalWeight += weight; } { temp = textureLod(g_mainTex, ssC + g_BlurAxis * g_mainTex_TexelSize.xy * (-3.0f * g_ssao_BlurFilterDistance), 0); float tapKey = UnpackKey(temp.gb); float value = temp.r; // spatial domain: offset gaussian tap float weight = 0.3 + gaussian[3]; // range domain (the "bilateral" weight). As depth difference increases, decrease weight. // these two keys is depth value which we stored in AoTex generate in .gb weight *= max(0.0, 1.0 - (2000.0 * g_EdgeSharpness) * abs(tapKey - key)); sum += value * weight; totalWeight += weight; } { temp = textureLod(g_mainTex, ssC + g_BlurAxis * g_mainTex_TexelSize.xy * (-2.0f * g_ssao_BlurFilterDistance), 0); float tapKey = UnpackKey(temp.gb); float value = temp.r; // spatial domain: offset gaussian tap float weight = 0.3 + gaussian[2]; // range domain (the "bilateral" weight). As depth difference increases, decrease weight. // these two keys is depth value which we stored in AoTex generate in .gb weight *= max(0.0, 1.0 - (2000.0 * g_EdgeSharpness) * abs(tapKey - key)); sum += value * weight; totalWeight += weight; } { temp = textureLod(g_mainTex,ssC + g_BlurAxis * g_mainTex_TexelSize.xy * (-1.0f * g_ssao_BlurFilterDistance), 0); float tapKey = UnpackKey(temp.gb); float value = temp.r; // spatial domain: offset gaussian tap float weight = 0.3 + gaussian[1]; // range domain (the "bilateral" weight). As depth difference increases, decrease weight. // these two keys is depth value which we stored in AoTex generate in .gb weight *= max(0.0, 1.0 - (2000.0 * g_EdgeSharpness) * abs(tapKey - key)); sum += value * weight; totalWeight += weight; } { temp = textureLod(g_mainTex,ssC + g_BlurAxis * g_mainTex_TexelSize.xy * (1.0f * g_ssao_BlurFilterDistance), 0); float tapKey = UnpackKey(temp.gb); float value = temp.r; // spatial domain: offset gaussian tap float weight = 0.3 + gaussian[1]; // range domain (the "bilateral" weight). As depth difference increases, decrease weight. //these two keys is depth value which we stored in AoTex generate in .gb weight *= max(0.0, 1.0 - (2000.0 * g_EdgeSharpness) * abs(tapKey - key)); sum += value * weight; totalWeight += weight; } { temp = textureLod(g_mainTex,ssC + g_BlurAxis * g_mainTex_TexelSize.xy * (2.0f * g_ssao_BlurFilterDistance),0); float tapKey = UnpackKey(temp.gb); float value = temp.r; // spatial domain: offset gaussian tap float weight = 0.3 + gaussian[2]; // range domain (the "bilateral" weight). As depth difference increases, decrease weight. // these two keys is depth value which we stored in AoTex generate in .gb weight *= max(0.0, 1.0 - (2000.0 * g_EdgeSharpness) * abs(tapKey - key)); sum += value * weight; totalWeight += weight; } { temp = textureLod(g_mainTex,ssC + g_BlurAxis * g_mainTex_TexelSize.xy * (3.0f * g_ssao_BlurFilterDistance),0); float tapKey = UnpackKey(temp.gb); float value = temp.r; // spatial domain: offset gaussian tap float weight = 0.3 + gaussian[3]; // range domain (the "bilateral" weight). As depth difference increases, decrease weight. // these two keys is depth value which we stored in AoTex generate in .gb weight *= max(0.0, 1.0 - (2000.0 * g_EdgeSharpness) * abs(tapKey - key)); sum += value * weight; totalWeight += weight; } { temp = textureLod(g_mainTex,ssC + g_BlurAxis * g_mainTex_TexelSize.xy * (4.0f * g_ssao_BlurFilterDistance), 0); float tapKey = UnpackKey(temp.gb); float value = temp.r; // spatial domain: offset gaussian tap float weight = 0.3 + gaussian[4]; // range domain (the "bilateral" weight). As depth difference increases, decrease weight. //these two keys is depth value which we stored in AoTex generate in .gb weight *= max(0.0, 1.0 - (2000.0 * g_EdgeSharpness) * abs(tapKey - key)); sum += value * weight; totalWeight += weight; } const float epsilon = 0.0001; fragment = vec4(sum / (totalWeight + epsilon)); fragment.gb = passthrough2; myOut = fragment; }