178 changed files with 15072 additions and 38 deletions
@ -0,0 +1,8 @@ |
|||
#!/bin/sh |
|||
|
|||
mkdir -p data/Hlms || exit $? |
|||
mkdir -p data/CommonMaterials || exit $? |
|||
rsync -av ../Dependencies/ogre-next/Samples/Media/Hlms/Common data/Hlms/ || exit $? |
|||
rsync -av ../Dependencies/ogre-next/Samples/Media/Hlms/Pbs data/Hlms/ || exit $? |
|||
rsync -av ../Dependencies/ogre-next/Samples/Media/Hlms/Unlit data/Hlms/ || exit $? |
|||
rsync -av ../Dependencies/ogre-next/Samples/Media/2.0/scripts/materials/Common data/CommonMaterials || exit $? |
@ -1,3 +1,4 @@ |
|||
SubDir . ; |
|||
|
|||
# TODO: This should depend on libGamelib such that Jam knows it needs to re-link if Gamelib updates |
|||
Main ogreApp : OgreApp.cake.cpp ; |
@ -0,0 +1,47 @@ |
|||
|
|||
//#include "SyntaxHighlightingMisc.h" |
|||
|
|||
#ifdef HEADER |
|||
/** |
|||
Finds the intersection between the cube of half size probeShapeHalfSize and center at origin |
|||
and the reflDir, so that posLS + reflDirLS * retVal = intersectionPos; |
|||
*/ |
|||
INLINE float findIntersectionDistance( float3 reflDirLS, float3 posLS, float3 probeShapeHalfSize ) |
|||
{ |
|||
//Find the ray intersection with box plane |
|||
float3 invReflDirLS = float3( 1.0, 1.0, 1.0 ) / reflDirLS; |
|||
float3 intersectAtMinPlane = ( -probeShapeHalfSize - posLS ) * invReflDirLS; |
|||
float3 intersectAtMaxPlane = ( probeShapeHalfSize - posLS ) * invReflDirLS; |
|||
//Get the largest intersection values (we are not interested in negative values) |
|||
float3 largestIntersect = max( intersectAtMaxPlane.xyz, intersectAtMinPlane.xyz ); |
|||
//Get the closest of all solutions |
|||
float distance = min( min( largestIntersect.x, largestIntersect.y ), largestIntersect.z ); |
|||
return distance; |
|||
} |
|||
|
|||
#else |
|||
|
|||
float fDepth = OGRE_Sample( depthTexture, pointSampler, inPs.uv0 ).x; |
|||
|
|||
float linearDepth = p_projectionParams.y / (fDepth - p_projectionParams.x); |
|||
|
|||
float3 viewSpacePosition = inPs.cameraDir * linearDepth; |
|||
|
|||
float fDist = length( viewSpacePosition.xyz ); |
|||
float3 probeToPosDir = viewSpacePosition / fDist; |
|||
|
|||
probeToPosDir = mul( p_viewSpaceToProbeLocalSpace, probeToPosDir ); |
|||
|
|||
float fApproxDist = findIntersectionDistance( probeToPosDir, p_cameraPosLS, p_probeShapeHalfSize ); |
|||
|
|||
//We can't store fDist directly because we have limited precision (often 8 bits) |
|||
//Thus we store it in terms of |
|||
// fApproxDist * alpha = fDist; |
|||
//During render we'll know fApproxDist and alpha, but want to know fDist |
|||
//We also know alpha >= 0 |
|||
//For precision issues and because it's good enough, we force alpha <= 2.0 |
|||
float alpha = fDist / fApproxDist; |
|||
alpha *= 0.5; |
|||
alpha = min( alpha, 1.0 ); |
|||
|
|||
#endif |
@ -0,0 +1,225 @@ |
|||
fragment_program Ogre/Copy/4xFP32_ps_HLSL hlsl |
|||
{ |
|||
source Copyback_4xFP32_ps.hlsl |
|||
entry_point main |
|||
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/4xFP32_ps_GLSL glsl |
|||
{ |
|||
source Copyback_4xFP32_ps.glsl |
|||
default_params { param_named tex int 0 } |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/4xFP32_ps_VK glslvk |
|||
{ |
|||
source Copyback_4xFP32_ps.glsl |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/4xFP32_ps_Metal metal |
|||
{ |
|||
source Copyback_4xFP32_ps.metal |
|||
shader_reflection_pair_hint Ogre/Compositor/Quad_vs |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/4xFP32_ps unified |
|||
{ |
|||
delegate Ogre/Copy/4xFP32_ps_GLSL |
|||
delegate Ogre/Copy/4xFP32_ps_VK |
|||
delegate Ogre/Copy/4xFP32_ps_HLSL |
|||
delegate Ogre/Copy/4xFP32_ps_Metal |
|||
} |
|||
|
|||
material Ogre/Copy/4xFP32 |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
depth_check off |
|||
depth_write off |
|||
|
|||
cull_hardware none |
|||
|
|||
vertex_program_ref Ogre/Compositor/Quad_vs |
|||
{ |
|||
} |
|||
|
|||
fragment_program_ref Ogre/Copy/4xFP32_ps |
|||
{ |
|||
} |
|||
|
|||
texture_unit |
|||
{ |
|||
filtering none |
|||
tex_address_mode clamp |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/4xFP32_2DArray_ps_HLSL hlsl |
|||
{ |
|||
source Copyback_4xFP32_2DArray_ps.hlsl |
|||
entry_point main |
|||
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/4xFP32_2DArray_ps_GLSL glsl |
|||
{ |
|||
source Copyback_4xFP32_2DArray_ps.glsl |
|||
default_params { param_named tex int 0 } |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/4xFP32_2DArray_ps_VK glslvk |
|||
{ |
|||
source Copyback_4xFP32_2DArray_ps.glsl |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/4xFP32_2DArray_ps_Metal metal |
|||
{ |
|||
source Copyback_4xFP32_2DArray_ps.metal |
|||
shader_reflection_pair_hint Ogre/Compositor/Quad_vs |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/4xFP32_2DArray_ps unified |
|||
{ |
|||
delegate Ogre/Copy/4xFP32_2DArray_ps_GLSL |
|||
delegate Ogre/Copy/4xFP32_2DArray_ps_VK |
|||
delegate Ogre/Copy/4xFP32_2DArray_ps_HLSL |
|||
delegate Ogre/Copy/4xFP32_2DArray_ps_Metal |
|||
} |
|||
|
|||
material Ogre/Copy/4xFP32_2DArray |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
depth_check off |
|||
depth_write off |
|||
|
|||
cull_hardware none |
|||
|
|||
vertex_program_ref Ogre/Compositor/Quad_vs |
|||
{ |
|||
} |
|||
|
|||
fragment_program_ref Ogre/Copy/4xFP32_2DArray_ps |
|||
{ |
|||
param_named sliceIdx float 0 |
|||
} |
|||
|
|||
texture_unit |
|||
{ |
|||
filtering none |
|||
tex_address_mode clamp |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/1xFP32_ps_HLSL hlsl |
|||
{ |
|||
source Copyback_1xFP32_ps.hlsl |
|||
entry_point main |
|||
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/1xFP32_ps_GLSL glsl |
|||
{ |
|||
source Copyback_1xFP32_ps.glsl |
|||
default_params { param_named tex int 0 } |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/1xFP32_ps_VK glslvk |
|||
{ |
|||
source Copyback_1xFP32_ps.glsl |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/1xFP32_ps_Metal metal |
|||
{ |
|||
source Copyback_1xFP32_ps.metal |
|||
shader_reflection_pair_hint Ogre/Compositor/Quad_vs |
|||
} |
|||
|
|||
fragment_program Ogre/Copy/1xFP32_ps unified |
|||
{ |
|||
delegate Ogre/Copy/1xFP32_ps_GLSL |
|||
delegate Ogre/Copy/1xFP32_ps_VK |
|||
delegate Ogre/Copy/1xFP32_ps_HLSL |
|||
delegate Ogre/Copy/1xFP32_ps_Metal |
|||
} |
|||
|
|||
material Ogre/Copy/1xFP32 : Ogre/Copy/4xFP32 |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
fragment_program_ref Ogre/Copy/1xFP32_ps |
|||
{ |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_GLSL glsl |
|||
{ |
|||
source Resolve_1xFP32_Subsample0_ps.glsl |
|||
default_params { param_named tex int 0 } |
|||
} |
|||
|
|||
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_VK glslvk |
|||
{ |
|||
source Resolve_1xFP32_Subsample0_ps.glsl |
|||
} |
|||
|
|||
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_HLSL hlsl |
|||
{ |
|||
source Resolve_1xFP32_Subsample0_ps.hlsl |
|||
entry_point main |
|||
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 |
|||
} |
|||
|
|||
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_Metal metal |
|||
{ |
|||
source Resolve_1xFP32_Subsample0_ps.metal |
|||
shader_reflection_pair_hint Ogre/Compositor/Quad_vs |
|||
} |
|||
|
|||
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps unified |
|||
{ |
|||
delegate Ogre/Resolve/1xFP32_Subsample0_ps_GLSL |
|||
delegate Ogre/Resolve/1xFP32_Subsample0_ps_VK |
|||
delegate Ogre/Resolve/1xFP32_Subsample0_ps_HLSL |
|||
delegate Ogre/Resolve/1xFP32_Subsample0_ps_Metal |
|||
} |
|||
|
|||
material Ogre/Resolve/1xFP32_Subsample0 |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
depth_check off |
|||
depth_write off |
|||
|
|||
cull_hardware none |
|||
|
|||
vertex_program_ref Ogre/Compositor/Quad_vs |
|||
{ |
|||
} |
|||
|
|||
fragment_program_ref Ogre/Resolve/1xFP32_Subsample0_ps |
|||
{ |
|||
} |
|||
|
|||
texture_unit |
|||
{ |
|||
filtering none |
|||
tex_address_mode clamp |
|||
} |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,65 @@ |
|||
//DPM stands for Dual Parabolloid Mapping. |
|||
|
|||
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL glsl |
|||
{ |
|||
source CubeToDpm_4xFP16_ps.glsl |
|||
default_params |
|||
{ |
|||
param_named cubeTexture int 0 |
|||
} |
|||
} |
|||
|
|||
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_VK glslvk |
|||
{ |
|||
source CubeToDpm_4xFP16_ps.glsl |
|||
} |
|||
|
|||
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL hlsl |
|||
{ |
|||
source CubeToDpm_4xFP16_ps.hlsl |
|||
entry_point main |
|||
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 |
|||
} |
|||
|
|||
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_Metal metal |
|||
{ |
|||
source CubeToDpm_4xFP16_ps.metal |
|||
shader_reflection_pair_hint Ogre/Compositor/Quad_vs |
|||
} |
|||
|
|||
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps unified |
|||
{ |
|||
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL |
|||
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_VK |
|||
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL |
|||
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_Metal |
|||
} |
|||
|
|||
// Converts a cubemap to DPM in the pixel shader. |
|||
material Ogre/DPM/CubeToDpm |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
depth_check off |
|||
depth_func always_pass |
|||
|
|||
cull_hardware none |
|||
|
|||
vertex_program_ref Ogre/Compositor/Quad_vs |
|||
{ |
|||
} |
|||
|
|||
fragment_program_ref Ogre/DPM/CubeToDpm_4xFP16_ps |
|||
{ |
|||
} |
|||
|
|||
texture_unit depthTexture |
|||
{ |
|||
filtering bilinear |
|||
tex_address_mode clamp |
|||
} |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,134 @@ |
|||
//DPSM stands for Dual Parabolloid Shadow Mapping. |
|||
|
|||
fragment_program Ogre/DPSM/CubeToDpsm_ps_GLSL glsl |
|||
{ |
|||
source CubeToDpsm_ps.glsl |
|||
default_params |
|||
{ |
|||
param_named depthTexture int 0 |
|||
} |
|||
} |
|||
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_GLSL glsl : Ogre/DPSM/CubeToDpsm_ps_GLSL |
|||
{ |
|||
preprocessor_defines OUTPUT_TO_COLOUR=1 |
|||
} |
|||
|
|||
fragment_program Ogre/DPSM/CubeToDpsm_ps_VK glslvk |
|||
{ |
|||
source CubeToDpsm_ps.glsl |
|||
} |
|||
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_VK glslvk : Ogre/DPSM/CubeToDpsm_ps_VK |
|||
{ |
|||
preprocessor_defines OUTPUT_TO_COLOUR=1 |
|||
} |
|||
|
|||
fragment_program Ogre/DPSM/CubeToDpsm_ps_GLSLES glsles |
|||
{ |
|||
source CubeToDpsm_ps.glsles |
|||
default_params |
|||
{ |
|||
param_named depthTexture int 0 |
|||
} |
|||
} |
|||
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_GLSLES glsles : Ogre/DPSM/CubeToDpsm_ps_GLSLES |
|||
{ |
|||
preprocessor_defines OUTPUT_TO_COLOUR=1 |
|||
} |
|||
|
|||
fragment_program Ogre/DPSM/CubeToDpsm_ps_HLSL hlsl |
|||
{ |
|||
source CubeToDpsm_ps.hlsl |
|||
entry_point main |
|||
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 |
|||
} |
|||
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_HLSL hlsl : Ogre/DPSM/CubeToDpsm_ps_HLSL |
|||
{ |
|||
preprocessor_defines OUTPUT_TO_COLOUR=1 |
|||
} |
|||
|
|||
fragment_program Ogre/DPSM/CubeToDpsm_ps_Metal metal |
|||
{ |
|||
source CubeToDpsm_ps.metal |
|||
shader_reflection_pair_hint Ogre/Compositor/Quad_vs |
|||
} |
|||
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_Metal metal : Ogre/DPSM/CubeToDpsm_ps_Metal |
|||
{ |
|||
preprocessor_defines OUTPUT_TO_COLOUR=1 |
|||
} |
|||
|
|||
fragment_program Ogre/DPSM/CubeToDpsm_ps unified |
|||
{ |
|||
delegate Ogre/DPSM/CubeToDpsm_ps_GLSL |
|||
delegate Ogre/DPSM/CubeToDpsm_ps_GLSLES |
|||
delegate Ogre/DPSM/CubeToDpsm_ps_VK |
|||
delegate Ogre/DPSM/CubeToDpsm_ps_HLSL |
|||
delegate Ogre/DPSM/CubeToDpsm_ps_Metal |
|||
} |
|||
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps unified |
|||
{ |
|||
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_GLSL |
|||
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_GLSLES |
|||
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_VK |
|||
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_HLSL |
|||
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_Metal |
|||
} |
|||
|
|||
// Converts a cubemap to DPSM in the pixel shader. |
|||
material Ogre/DPSM/CubeToDpsm |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
//Depth writes must be on, since we write directly to the depth buffer. |
|||
depth_check on |
|||
depth_write on |
|||
|
|||
depth_func always_pass |
|||
|
|||
cull_hardware none |
|||
|
|||
vertex_program_ref Ogre/Compositor/Quad_vs |
|||
{ |
|||
} |
|||
|
|||
fragment_program_ref Ogre/DPSM/CubeToDpsm_ps |
|||
{ |
|||
} |
|||
|
|||
texture_unit depthTexture |
|||
{ |
|||
filtering none |
|||
tex_address_mode clamp |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
material Ogre/DPSM/CubeToDpsmColour |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
depth_check off |
|||
depth_write off |
|||
|
|||
cull_hardware none |
|||
|
|||
vertex_program_ref Ogre/Compositor/Quad_vs |
|||
{ |
|||
} |
|||
|
|||
fragment_program_ref Ogre/DPSM/CubeToDpsm_Colour_ps |
|||
{ |
|||
} |
|||
|
|||
texture_unit depthTexture |
|||
{ |
|||
filtering none |
|||
tex_address_mode clamp |
|||
} |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,67 @@ |
|||
|
|||
fragment_program Ogre/Depth/DownscaleMax_ps_GLSL glsl |
|||
{ |
|||
source DepthDownscaleMax_ps.glsl |
|||
default_params |
|||
{ |
|||
param_named depthTexture int 0 |
|||
} |
|||
} |
|||
|
|||
fragment_program Ogre/Depth/DownscaleMax_ps_VK glslvk |
|||
{ |
|||
source DepthDownscaleMax_ps.glsl |
|||
} |
|||
|
|||
fragment_program Ogre/Depth/DownscaleMax_ps_HLSL hlsl |
|||
{ |
|||
source DepthDownscaleMax_ps.hlsl |
|||
entry_point main |
|||
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 |
|||
} |
|||
|
|||
fragment_program Ogre/Depth/DownscaleMax_ps_Metal metal |
|||
{ |
|||
source DepthDownscaleMax_ps.metal |
|||
shader_reflection_pair_hint Ogre/Compositor/Quad_vs |
|||
} |
|||
|
|||
fragment_program Ogre/Depth/DownscaleMax_ps unified |
|||
{ |
|||
delegate Ogre/Depth/DownscaleMax_ps_GLSL |
|||
delegate Ogre/Depth/DownscaleMax_ps_VK |
|||
delegate Ogre/Depth/DownscaleMax_ps_HLSL |
|||
delegate Ogre/Depth/DownscaleMax_ps_Metal |
|||
} |
|||
|
|||
// Downscales resolution of input depth texture by half (w/2 x h/2) |
|||
// using a max filter (max depth of all 4 neighbours) |
|||
material Ogre/Depth/DownscaleMax |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
depth_check on |
|||
depth_write on |
|||
|
|||
depth_func always_pass |
|||
|
|||
cull_hardware none |
|||
|
|||
vertex_program_ref Ogre/Compositor/Quad_vs |
|||
{ |
|||
} |
|||
|
|||
fragment_program_ref Ogre/Depth/DownscaleMax_ps |
|||
{ |
|||
} |
|||
|
|||
texture_unit depthTexture |
|||
{ |
|||
filtering none |
|||
tex_address_mode clamp |
|||
} |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,117 @@ |
|||
fragment_program ESM/GaussianLogFilterH_ps_GLSL glsl |
|||
{ |
|||
source GaussianBlurLogFilter_ps.glsl |
|||
preprocessor_defines NUM_WEIGHTS=9,K=80,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0 |
|||
default_params |
|||
{ |
|||
param_named tex int 0 |
|||
} |
|||
} |
|||
|
|||
fragment_program ESM/GaussianLogFilterV_ps_GLSL glsl : ESM/GaussianLogFilterH_ps_GLSL |
|||
{ |
|||
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1 |
|||
} |
|||
|
|||
fragment_program ESM/GaussianLogFilterH_ps_VK glslvk |
|||
{ |
|||
source GaussianBlurLogFilter_ps.glsl |
|||
preprocessor_defines NUM_WEIGHTS=9,K=80,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0 |
|||
} |
|||
|
|||
fragment_program ESM/GaussianLogFilterV_ps_VK glslvk : ESM/GaussianLogFilterH_ps_VK |
|||
{ |
|||
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1 |
|||
} |
|||
|
|||
fragment_program ESM/GaussianLogFilterH_ps_HLSL hlsl |
|||
{ |
|||
source GaussianBlurLogFilter_ps.hlsl |
|||
entry_point main |
|||
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 |
|||
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0 |
|||
} |
|||
|
|||
fragment_program ESM/GaussianLogFilterV_ps_HLSL hlsl : ESM/GaussianLogFilterH_ps_HLSL |
|||
{ |
|||
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1 |
|||
} |
|||
|
|||
fragment_program ESM/GaussianLogFilterH_ps_Metal metal |
|||
{ |
|||
source GaussianBlurLogFilter_ps.metal |
|||
shader_reflection_pair_hint Ogre/Compositor/Quad_vs |
|||
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0 |
|||
} |
|||
|
|||
fragment_program ESM/GaussianLogFilterV_ps_Metal metal : ESM/GaussianLogFilterH_ps_Metal |
|||
{ |
|||
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1 |
|||
} |
|||
|
|||
fragment_program ESM/GaussianLogFilterH_ps unified |
|||
{ |
|||
delegate ESM/GaussianLogFilterH_ps_GLSL |
|||
delegate ESM/GaussianLogFilterH_ps_VK |
|||
delegate ESM/GaussianLogFilterH_ps_HLSL |
|||
delegate ESM/GaussianLogFilterH_ps_Metal |
|||
|
|||
default_params |
|||
{ |
|||
param_named weights float9 0.013960189 0.022308320 0.033488754 0.047226712 0.062565230 0.077863686 0.091031872 0.099978946 0.10315263 |
|||
} |
|||
} |
|||
|
|||
fragment_program ESM/GaussianLogFilterV_ps unified |
|||
{ |
|||
delegate ESM/GaussianLogFilterV_ps_GLSL |
|||
delegate ESM/GaussianLogFilterV_ps_VK |
|||
delegate ESM/GaussianLogFilterV_ps_HLSL |
|||
delegate ESM/GaussianLogFilterV_ps_Metal |
|||
|
|||
default_params |
|||
{ |
|||
param_named weights float9 0.013960189 0.022308320 0.033488754 0.047226712 0.062565230 0.077863686 0.091031872 0.099978946 0.10315263 |
|||
} |
|||
} |
|||
|
|||
material ESM/GaussianLogFilterH |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
depth_check off |
|||
depth_write off |
|||
|
|||
cull_hardware none |
|||
|
|||
vertex_program_ref Ogre/Compositor/Quad_vs |
|||
{ |
|||
} |
|||
|
|||
fragment_program_ref ESM/GaussianLogFilterH_ps |
|||
{ |
|||
} |
|||
|
|||
texture_unit tex |
|||
{ |
|||
filtering none |
|||
tex_address_mode clamp |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
material ESM/GaussianLogFilterV : ESM/GaussianLogFilterH |
|||
{ |
|||
technique |
|||
{ |
|||
pass |
|||
{ |
|||
fragment_program_ref ESM/GaussianLogFilterV_ps |
|||
{ |
|||
} |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,101 @@ |
|||
{ |
|||
"samplers" : |
|||
{ |
|||
"PointClamp" : |
|||
{ |
|||
"min" : "point", |
|||
"mag" : "point", |
|||
"mip" : "point", |
|||
"u" : "clamp", |
|||
"v" : "clamp", |
|||
"w" : "clamp", |
|||
"miplodbias" : 0, |
|||
"max_anisotropic" : 1, |
|||
"compare_function" : "disabled", |
|||
"border" : [1, 1, 1, 1], |
|||
"min_lod" : -3.40282347E+38, |
|||
"max_lod" : 3.40282347E+38 |
|||
} |
|||
}, |
|||
|
|||
"compute" : |
|||
{ |
|||
"ESM/GaussianLogFilterH" : |
|||
{ |
|||
"threads_per_group" : [32, 2, 1], |
|||
"thread_groups" : [8, 512, 1], |
|||
|
|||
"source" : "GaussianBlurLogFilterBase_cs", |
|||
"pieces" : ["EsmGaussianBlurLogFilter_cs"], |
|||
"inform_shader_of_texture_data_change" : true, |
|||
|
|||
"uav_units" : 1, |
|||
|
|||
"textures" : |
|||
[ |
|||
{ |
|||
"sampler" : "PointClamp" |
|||
} |
|||
], |
|||
|
|||
"params" : |
|||
[ |
|||
["g_f4OutputSize", "packed_texture_size", 0], |
|||
["c_weights[0]", [0.013960189, 0.022308320, 0.033488754, 0.047226712, |
|||
0.062565230, 0.077863686, 0.091031872, 0.099978946, 0.10315263]] |
|||
], |
|||
|
|||
"params_glsl" : |
|||
[ |
|||
["inputImage", [0], "int"], |
|||
["outputImage", [0], "int"] |
|||
], |
|||
|
|||
"properties" : |
|||
{ |
|||
"horizontal_pass" : 1, |
|||
"kernel_radius" : 8, |
|||
"K" : 80 |
|||
} |
|||
}, |
|||
|
|||
"ESM/GaussianLogFilterV" : |
|||
{ |
|||
"threads_per_group" : [32, 2, 1], |
|||
"thread_groups" : [512, 8, 1], |
|||
|
|||
"source" : "GaussianBlurLogFilterBase_cs", |
|||
"pieces" : ["EsmGaussianBlurLogFilter_cs"], |
|||
"inform_shader_of_texture_data_change" : true, |
|||
|
|||
"uav_units" : 1, |
|||
|
|||
"textures" : |
|||
[ |
|||
{ |
|||
"sampler" : "PointClamp" |
|||
} |
|||
], |
|||
|
|||
"params" : |
|||
[ |
|||
["g_f4OutputSize", "packed_texture_size", 0], |
|||
["c_weights[0]", [0.013960189, 0.022308320, 0.033488754, 0.047226712, |
|||
0.062565230, 0.077863686, 0.091031872, 0.099978946, 0.10315263]] |
|||
], |
|||
|
|||
"params_glsl" : |
|||
[ |
|||
["inputImage", [0], "int"], |
|||
["outputImage", [0], "int"] |
|||
], |
|||
|
|||
"properties" : |
|||
{ |
|||
"horizontal_pass" : 0, |
|||
"kernel_radius" : 8, |
|||
"K" : 80 |
|||
} |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,18 @@ |
|||
#version ogre_glsl_ver_330 |
|||
|
|||
vulkan_layout( ogre_t0 ) uniform texture2D tex; |
|||
vulkan( layout( ogre_s0 ) uniform sampler texSampler ); |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
in block |
|||
{ |
|||
vec2 uv0; |
|||
} inPs; |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
out float fragColour; |
|||
|
|||
void main() |
|||
{ |
|||
fragColour = texture( vkSampler2D( tex, texSampler ), inPs.uv0 ).x; |
|||
} |
@ -0,0 +1,22 @@ |
|||
#version ogre_glsl_ver_330 |
|||
|
|||
vulkan_layout( ogre_t0 ) uniform texture2DArray tex; |
|||
vulkan( layout( ogre_s0 ) uniform sampler texSampler ); |
|||
|
|||
vulkan( layout( ogre_P0 ) uniform Params { ) |
|||
uniform float sliceIdx; |
|||
vulkan( }; ) |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
in block |
|||
{ |
|||
vec2 uv0; |
|||
} inPs; |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
out vec4 fragColour; |
|||
|
|||
void main() |
|||
{ |
|||
fragColour = texture( vkSampler2DArray( tex, texSampler ), vec3( inPs.uv0, sliceIdx ) ); |
|||
} |
@ -0,0 +1,18 @@ |
|||
#version ogre_glsl_ver_330 |
|||
|
|||
vulkan_layout( ogre_t0 ) uniform texture2D tex; |
|||
vulkan( layout( ogre_s0 ) uniform sampler texSampler ); |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
in block |
|||
{ |
|||
vec2 uv0; |
|||
} inPs; |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
out vec4 fragColour; |
|||
|
|||
void main() |
|||
{ |
|||
fragColour = texture( vkSampler2D( tex, texSampler ), inPs.uv0 ); |
|||
} |
@ -0,0 +1,28 @@ |
|||
#version ogre_glsl_ver_330 |
|||
|
|||
vulkan( layout( ogre_P0 ) uniform Params { ) |
|||
uniform float lodLevel; |
|||
vulkan( }; ) |
|||
|
|||
vulkan_layout( ogre_t0 ) uniform textureCube cubeTexture; |
|||
vulkan( layout( ogre_s0 ) uniform sampler cubeSampler ); |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
in block |
|||
{ |
|||
vec2 uv0; |
|||
} inPs; |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
out vec4 fragColour; |
|||
|
|||
void main() |
|||
{ |
|||
vec3 cubeDir; |
|||
cubeDir.x = mod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0; |
|||
cubeDir.y = inPs.uv0.y * 2.0 - 1.0; |
|||
cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y); |
|||
cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z; |
|||
|
|||
fragColour.xyzw = textureLod( vkSamplerCube( cubeTexture, cubeSampler ), cubeDir.xyz, lodLevel ).xyzw; |
|||
} |
@ -0,0 +1,37 @@ |
|||
#version ogre_glsl_ver_330 |
|||
|
|||
vulkan_layout( ogre_t0 ) uniform textureCube depthTexture; |
|||
vulkan( layout( ogre_s0 ) uniform sampler cubeSampler ); |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
in block |
|||
{ |
|||
vec2 uv0; |
|||
} inPs; |
|||
|
|||
in vec4 gl_FragCoord; |
|||
//out float gl_FragDepth; |
|||
|
|||
#ifdef OUTPUT_TO_COLOUR |
|||
vulkan_layout( location = 0 ) |
|||
out float fragColour; |
|||
#endif |
|||
|
|||
void main() |
|||
{ |
|||
vec3 cubeDir; |
|||
|
|||
cubeDir.x = mod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0; |
|||
cubeDir.y = inPs.uv0.y * 2.0 - 1.0; |
|||
cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y); |
|||
|
|||
cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z; |
|||
|
|||
float depthValue = textureLod( vkSamplerCube( depthTexture, cubeSampler ), cubeDir.xyz, 0 ).x; |
|||
|
|||
#ifdef OUTPUT_TO_COLOUR |
|||
fragColour = depthValue; |
|||
#else |
|||
gl_FragDepth = depthValue; |
|||
#endif |
|||
} |
@ -0,0 +1,17 @@ |
|||
#version ogre_glsl_ver_330 |
|||
|
|||
vulkan_layout( ogre_t0 ) uniform texture2D depthTexture; |
|||
|
|||
in vec4 gl_FragCoord; |
|||
//out float gl_FragDepth; |
|||
|
|||
void main() |
|||
{ |
|||
float fDepth0 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0), 0 ).x; |
|||
float fDepth1 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 0, 1 ), 0 ).x; |
|||
float fDepth2 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 1, 0 ), 0 ).x; |
|||
float fDepth3 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 1, 1 ), 0 ).x; |
|||
|
|||
//gl_FragDepth = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0), 0 ).x; |
|||
gl_FragDepth = max( max( fDepth0, fDepth1 ), max( fDepth2, fDepth3 ) ); |
|||
} |
@ -0,0 +1,22 @@ |
|||
//Based on GPUOpen's samples SeparableFilter11 |
|||
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11 |
|||
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau |
|||
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps |
|||
|
|||
//TL;DR: |
|||
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series). |
|||
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each. |
|||
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256 |
|||
|
|||
@piece( data_type )float@end |
|||
@piece( lds_data_type )float@end |
|||
@piece( lds_definition )shared float g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];@end |
|||
|
|||
@piece( image_sample ) |
|||
return textureLod( vkSampler2D( inputImage, inputSampler ), f2SamplePosition, 0 ).x; |
|||
@end |
|||
|
|||
@piece( image_store ) |
|||
@foreach( 4, iPixel ) |
|||
imageStore( outputImage, ivec2( i2Center + @iPixel * i2Inc ), vec4( outColour[ @iPixel ], 0, 0, 1.0 ) );@end |
|||
@end |
@ -0,0 +1,263 @@ |
|||
@property( syntax != glslvk ) |
|||
#version 430 |
|||
@else |
|||
#version 450 |
|||
@end |
|||
|
|||
//Based on GPUOpen's samples SeparableFilter11 |
|||
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11 |
|||
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau |
|||
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps |
|||
|
|||
//TL;DR: |
|||
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series). |
|||
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each. |
|||
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256 |
|||
|
|||
// For this shader to work, several pieces need to be defined: |
|||
// data_type (i.e. vec3) |
|||
// lds_data_type (i.e. vec3, uint) |
|||
// lds_definition |
|||
// image_store |
|||
// image_sample |
|||
// decode_lds (optional, i.e. when lds_data_type != data_type) |
|||
// Define the property "downscale" if you're doing a downsample. |
|||
// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale |
|||
// The script uses the template syntax to automatically set the num. of threadgroups |
|||
// based on the bound input texture. |
|||
|
|||
vulkan( layout( ogre_s0 ) uniform sampler inputSampler ); |
|||
@property( texture0_texture_type == TextureTypes_Type2DArray ) |
|||
vulkan_layout( ogre_t0 ) uniform texture2DArray inputImage; |
|||
@else |
|||
vulkan_layout( ogre_t0 ) uniform texture2D inputImage; |
|||
@end |
|||
|
|||
layout( vulkan( ogre_u0 ) vk_comma @insertpiece(uav0_pf_type) ) |
|||
@property( uav0_texture_type == TextureTypes_Type2DArray ) |
|||
uniform restrict writeonly image2DArray outputImage; |
|||
@else |
|||
uniform restrict writeonly image2D outputImage; |
|||
@end |
|||
|
|||
// 32 = 128 / 4 |
|||
layout( local_size_x = 32, |
|||
local_size_y = 2, |
|||
local_size_z = 1 ) in; |
|||
@pset( threads_per_group_x, 32 ) |
|||
@pset( threads_per_group_y, 2 ) |
|||
@pset( threads_per_group_z, 1 ) |
|||
|
|||
@pmul( pixelsPerRow, threads_per_group_x, 4 ) |
|||
@pset( rowsPerThreadGroup, threads_per_group_y ) |
|||
@pset( num_thread_groups_z, 1 ) |
|||
|
|||
@set( input_width, uav0_width_with_lod ) |
|||
@set( input_height, uav0_height_with_lod ) |
|||
|
|||
@property( horizontal_pass ) |
|||
@property( downscale ) @mul( input_width, 2 ) @end |
|||
|
|||
/// Calculate num_thread_groups_ |
|||
/// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow |
|||
/// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup |
|||
@add( num_thread_groups_x, input_width, pixelsPerRow ) |
|||
@sub( num_thread_groups_x, 1 ) |
|||
@div( num_thread_groups_x, pixelsPerRow ) |
|||
|
|||
@add( num_thread_groups_y, input_height, rowsPerThreadGroup ) |
|||
@sub( num_thread_groups_y, 1 ) |
|||
@div( num_thread_groups_y, rowsPerThreadGroup ) |
|||
@end @property( !horizontal_pass ) |
|||
@property( downscale ) @mul( input_height, 2 ) @end |
|||
|
|||
/// Calculate num_thread_groups_ |
|||
/// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup |
|||
/// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow |
|||
@add( num_thread_groups_x, input_width, rowsPerThreadGroup ) |
|||
@sub( num_thread_groups_x, 1 ) |
|||
@div( num_thread_groups_x, rowsPerThreadGroup ) |
|||
|
|||
@add( num_thread_groups_y, input_height, pixelsPerRow ) |
|||
@sub( num_thread_groups_y, 1 ) |
|||
@div( num_thread_groups_y, pixelsPerRow ) |
|||
@end |
|||
|
|||
/// shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ]; |
|||
@insertpiece( lds_definition ) |
|||
|
|||
#define C_WEIGHTS( x ) c_weights[(x) >> 2u][(x) & 3u] |
|||
|
|||
// weights_array_count = ( kernel_radius + 1u + 3u ) / 4u |
|||
@padd( weights_array_count, kernel_radius, 4 ) |
|||
@pdiv( weights_array_count, 4 ) |
|||
|
|||
vulkan( layout( ogre_P0 ) uniform Params { ) |
|||
uniform vec4 g_f4OutputSize; |
|||
uniform vec4 c_weights[@value( weights_array_count )]; |
|||
@insertpiece( extra_params ) |
|||
vulkan( }; ) |
|||
|
|||
@insertpiece( lds_data_type ) sampleTex( ivec2 i2Position , vec2 f2Offset ) |
|||
{ |
|||
vec2 f2SamplePosition = vec2( i2Position ) + vec2( 0.5f, 0.5f ); |
|||
|
|||
f2SamplePosition *= g_f4OutputSize.zw; |
|||
///return textureLod( inputImage, f2SamplePosition, 0 ).xyz; |
|||
@insertpiece( image_sample ) |
|||
} |
|||
|
|||
void ComputeFilterKernel( int iPixelOffset, int iLineOffset, ivec2 i2Center, ivec2 i2Inc ) |
|||
{ |
|||
@property( !downscale_lq ) |
|||
@insertpiece( data_type ) outColour[ 4 ]; |
|||
@end @property( downscale_lq ) |
|||
@insertpiece( data_type ) outColour[ 2 ]; |
|||
@end |
|||
@insertpiece( data_type ) RDI[ 4 ] ; |
|||
|
|||
@foreach( 4, iPixel ) |
|||
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end |
|||
|
|||
@property( !downscale_lq ) |
|||
@foreach( 4, iPixel ) |
|||
outColour[ @iPixel ] = RDI[ @iPixel ] * C_WEIGHTS( @value( kernel_radius ) );@end |
|||
@end @property( downscale_lq ) |
|||
@foreach( 2, iPixel ) |
|||
outColour[ @iPixel ] = RDI[ @iPixel * 2 ] * C_WEIGHTS( @value( kernel_radius ) );@end |
|||
@end |
|||
|
|||
@foreach( 4, iPixel ) |
|||
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end |
|||
|
|||
iPixelOffset += 4; |
|||
|
|||
/// Deal with taps to our left. |
|||
/// for ( iIteration = 0; iIteration < radius; iIteration += 1 ) |
|||
@foreach( kernel_radius, iIteration ) |
|||
@property( !downscale_lq ) |
|||
@foreach( 4, iPixel ) |
|||
outColour[ @iPixel ] += RDI[ @iPixel ] * C_WEIGHTS( @iIteration );@end |
|||
@end @property( downscale_lq ) |
|||
@foreach( 2, iPixel ) |
|||
outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * C_WEIGHTS( @iIteration );@end |
|||
@end |
|||
@foreach( 3, iPixel ) |
|||
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end |
|||
@foreach( 1, iPixel ) |
|||
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end |
|||
@end |
|||
|
|||
@foreach( 4, iPixel ) |
|||
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end |
|||
|
|||
@padd( kernel_radius_plus1, kernel_radius, 1 ) |
|||
@pmul( kernel_radius2x_plus1, kernel_radius, 2 ) |
|||
@padd( kernel_radius2x_plus1, 1 ) |
|||
|
|||
@pmul( kernel_radius2x, kernel_radius, 2 ) |
|||
|
|||
/// Deal with taps to our right. |
|||
/// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 ) |
|||
@foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 ) |
|||
@property( !downscale_lq ) |
|||
@foreach( 4, iPixel ) |
|||
outColour[ @iPixel ] += RDI[ @iPixel ] * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end |
|||
@end @property( downscale_lq ) |
|||
@foreach( 2, iPixel ) |
|||
outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end |
|||
@end |
|||
@foreach( 3, iPixel ) |
|||
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end |
|||
@foreach( 1, iPixel ) |
|||
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end |
|||
@end |
|||
|
|||
/* |
|||
foreach( 4, iPixel ) |
|||
imageStore( outputImage, ivec2( i2Center + iPixel * i2Inc ), vec4( outColour[ iPixel ], 1.0 ) );end |
|||
*/ |
|||
@insertpiece( image_store ) |
|||
} |
|||
|
|||
void main() |
|||
{ |
|||
/// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) |
|||
/// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 ) |
|||
@padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 ) |
|||
@pdiv( samples_per_thread, samples_per_threadgroup, 32 ) |
|||
|
|||
@property( horizontal_pass ) |
|||
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) ); |
|||
int iLineOffset = int( gl_LocalInvocationID.y ); |
|||
|
|||
ivec2 i2GroupCoord = ivec2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u ); |
|||
ivec2 i2Coord = ivec2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y ); |
|||
|
|||
@foreach( samples_per_thread, i ) |
|||
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( @i, gl_LocalInvocationID.y ) , vec2( 0.5f, 0.0f ) );@end |
|||
|
|||
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u ) |
|||
{ |
|||
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] = |
|||
sampleTex( i2GroupCoord + ivec2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), vec2( 0.5f, 0.0f ) ); |
|||
} |
|||
|
|||
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier) |
|||
//barrier ensures every thread's execution reached here. |
|||
memoryBarrierShared(); |
|||
barrier(); |
|||
|
|||
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u |
|||
i2Coord = ivec2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y ); |
|||
i2Coord.x += @value( kernel_radius ); |
|||
|
|||
if( i2Coord.x < int(g_f4OutputSize.x) ) |
|||
{ |
|||
ivec2 i2Center = i2Coord + ivec2( 0, gl_LocalInvocationID.y ); |
|||
ivec2 i2Inc = ivec2 ( 1, 0 ); |
|||
|
|||
@property( downscale ) |
|||
i2Center.x = int( uint( i2Center.x ) >> 1u ); |
|||
@end |
|||
|
|||
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc ); |
|||
} |
|||
@end @property( !horizontal_pass ) |
|||
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) ); |
|||
int iLineOffset = int( gl_LocalInvocationID.y ); |
|||
|
|||
ivec2 i2GroupCoord = ivec2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u ); |
|||
ivec2 i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset ); |
|||
|
|||
@foreach( samples_per_thread, i ) |
|||
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( gl_LocalInvocationID.y, @i ) , vec2( 0.0f, 0.5f ) );@end |
|||
|
|||
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u ) |
|||
{ |
|||
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] = |
|||
sampleTex( i2GroupCoord + ivec2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), vec2( 0.0f, 0.5f ) ); |
|||
} |
|||
|
|||
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier) |
|||
//barrier ensures every thread's execution reached here. |
|||
memoryBarrierShared(); |
|||
barrier(); |
|||
|
|||
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u |
|||
i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset ); |
|||
i2Coord.y += @value( kernel_radius ); |
|||
|
|||
if( i2Coord.y < int(g_f4OutputSize.y) ) |
|||
{ |
|||
ivec2 i2Center = i2Coord + ivec2( gl_LocalInvocationID.y, 0 ); |
|||
ivec2 i2Inc = ivec2 ( 0, 1 ); |
|||
|
|||
@property( downscale ) |
|||
i2Center.y = int( uint( i2Center.y ) >> 1u ); |
|||
@end |
|||
|
|||
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc ); |
|||
} |
|||
@end |
|||
} |
@ -0,0 +1,285 @@ |
|||
@property( syntax != glslvk ) |
|||
#version 430 |
|||
@else |
|||
#version 450 |
|||
@end |
|||
|
|||
//See GaussianBlurBase_cs for the original. |
|||
//This is a derived version which is used for filtering ESM (Exponential Shadow Maps). |
|||
//Normally ESM is in exponential space: exp( K * linearSpaceDepth ); |
|||
//Filtering should be done in that space. |
|||
//However because of precision reasons, we store linearSpaceDepth instead. In order to perform |
|||
//correct filtering, we use the following formula: |
|||
// exp( filteredDepth ) = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 ) + ... |
|||
// |
|||
//But this is not precision friendly. So we do instead: |
|||
// = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 ) |
|||
// = exp( d0 ) * ( w0 + w1 * exp( d1 ) / exp( d0 ) + w2 * exp( d2 ) / exp( d0 ) ) |
|||
// = exp( d0 ) * ( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) |
|||
// = exp( d0 ) * exp( log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) ) |
|||
// = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) ) |
|||
// exp( filteredDepth ) = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) ) |
|||
//Almost final formula: |
|||
// filteredDepth = d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) |
|||
// |
|||
//The formula is actually: |
|||
// exp( K * filteredDepth ) = w0 * exp( K * d0 ) + w1 * exp( K * d1 ) + w2 * exp( K * d2 ) + ... |
|||
//Final formula: |
|||
// = d0 + log( w0 + w1 * exp( K * (d1 - d0) ) + w2 * exp( K * (d2 - d0) ) ) / K |
|||
|
|||
//Like in the original filter: |
|||
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series). |
|||
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each. |
|||
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256 |
|||
|
|||
// For this shader to work, several pieces need to be defined: |
|||
// data_type (i.e. vec3) |
|||
// lds_data_type (i.e. vec3, uint) |
|||
// lds_definition |
|||
// image_store |
|||
// image_sample |
|||
// decode_lds (optional, i.e. when lds_data_type != data_type) |
|||
// Define the property "downscale" if you're doing a downsample. |
|||
// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale |
|||
// The script uses the template syntax to automatically set the num. of threadgroups |
|||
// based on the bound input texture. |
|||
|
|||
vulkan( layout( ogre_s0 ) uniform sampler inputSampler ); |
|||
vulkan_layout( ogre_t0 ) uniform texture2D inputImage; |
|||
|
|||
layout( vulkan( ogre_u0 ) vk_comma @insertpiece(uav0_pf_type) ) |
|||
uniform restrict writeonly image2D outputImage; |
|||
|
|||
// 32 = 128 / 4 |
|||
layout( local_size_x = 32, |
|||
local_size_y = 2, |
|||
local_size_z = 1 ) in; |
|||
@pset( threads_per_group_x, 32 ) |
|||
@pset( threads_per_group_y, 2 ) |
|||
@pset( threads_per_group_z, 1 ) |
|||
|
|||
@pmul( pixelsPerRow, threads_per_group_x, 4 ) |
|||
@pset( rowsPerThreadGroup, threads_per_group_y ) |
|||
@pset( num_thread_groups_z, 1 ) |
|||
|
|||
@set( input_width, uav0_width_with_lod ) |
|||
@set( input_height, uav0_height_with_lod ) |
|||
|
|||
@property( horizontal_pass ) |
|||
@property( downscale ) @mul( input_width, 2 ) @end |
|||
|
|||
/// Calculate num_thread_groups_ |
|||
/// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow |
|||
/// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup |
|||
@add( num_thread_groups_x, input_width, pixelsPerRow ) |
|||
@sub( num_thread_groups_x, 1 ) |
|||
@div( num_thread_groups_x, pixelsPerRow ) |
|||
|
|||
@add( num_thread_groups_y, input_height, rowsPerThreadGroup ) |
|||
@sub( num_thread_groups_y, 1 ) |
|||
@div( num_thread_groups_y, rowsPerThreadGroup ) |
|||
@end @property( !horizontal_pass ) |
|||
@property( downscale ) @mul( input_height, 2 ) @end |
|||
|
|||
/// Calculate num_thread_groups_ |
|||
/// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup |
|||
/// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow |
|||
@add( num_thread_groups_x, input_width, rowsPerThreadGroup ) |
|||
@sub( num_thread_groups_x, 1 ) |
|||
@div( num_thread_groups_x, rowsPerThreadGroup ) |
|||
|
|||
@add( num_thread_groups_y, input_height, pixelsPerRow ) |
|||
@sub( num_thread_groups_y, 1 ) |
|||
@div( num_thread_groups_y, pixelsPerRow ) |
|||
@end |
|||
|
|||
/// shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ]; |
|||
@insertpiece( lds_definition ) |
|||
|
|||
#define C_WEIGHTS( x ) c_weights[(x) >> 2u][(x) & 3u] |
|||
|
|||
// weights_array_count = ( kernel_radius + 1u + 3u ) / 4u |
|||
@padd( weights_array_count, kernel_radius, 4 ) |
|||
@pdiv( weights_array_count, 4 ) |
|||
|
|||
vulkan( layout( ogre_P0 ) uniform Params { ) |
|||
uniform vec4 g_f4OutputSize; |
|||
uniform vec4 c_weights[@value( weights_array_count )]; |
|||
@insertpiece( extra_params ) |
|||
vulkan( }; ) |
|||
|
|||
@insertpiece( lds_data_type ) sampleTex( ivec2 i2Position , vec2 f2Offset ) |
|||
{ |
|||
vec2 f2SamplePosition = vec2( i2Position ) + vec2( 0.5f, 0.5f ); |
|||
|
|||
f2SamplePosition *= g_f4OutputSize.zw; |
|||
///return textureLod( inputImage, f2SamplePosition, 0 ).xyz; |
|||
@insertpiece( image_sample ) |
|||
} |
|||
|
|||
void ComputeFilterKernel( int iPixelOffset, int iLineOffset, ivec2 i2Center, ivec2 i2Inc ) |
|||
{ |
|||
@property( !downscale_lq ) |
|||
@insertpiece( data_type ) outColour[ 4 ]; |
|||
@insertpiece( data_type ) firstSmpl[ 4 ]; |
|||
@end @property( downscale_lq ) |
|||
@insertpiece( data_type ) outColour[ 2 ]; |
|||
@insertpiece( data_type ) firstSmpl[ 4 ]; |
|||
@end |
|||
@insertpiece( data_type ) RDI[ 4 ] ; |
|||
|
|||
@foreach( 4, iPixel ) |
|||
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end |
|||
|
|||
@property( !downscale_lq ) |
|||
@foreach( 4, iPixel ) |
|||
firstSmpl[ @iPixel ].x = RDI[ @iPixel ]; |
|||
outColour[ @iPixel ].x = C_WEIGHTS( @value( kernel_radius ) );@end |
|||
@end @property( downscale_lq ) |
|||
@foreach( 2, iPixel ) |
|||
firstSmpl[ @iPixel ].x = RDI[ @iPixel * 2 ]; |
|||
outColour[ @iPixel ].x = C_WEIGHTS( @value( kernel_radius ) );@end |
|||
@end |
|||
|
|||
@foreach( 4, iPixel ) |
|||
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end |
|||
|
|||
iPixelOffset += 4; |
|||
|
|||
/// Deal with taps to our left. |
|||
/// for ( iIteration = 0; iIteration < radius; iIteration += 1 ) |
|||
@foreach( kernel_radius, iIteration ) |
|||
@property( !downscale_lq ) |
|||
@foreach( 4, iPixel ) |
|||
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @iIteration );@end |
|||
@end @property( downscale_lq ) |
|||
@foreach( 2, iPixel ) |
|||
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @iIteration );@end |
|||
@end |
|||
@foreach( 3, iPixel ) |
|||
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end |
|||
@foreach( 1, iPixel ) |
|||
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end |
|||
@end |
|||
|
|||
@foreach( 4, iPixel ) |
|||
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end |
|||
|
|||
@padd( kernel_radius_plus1, kernel_radius, 1 ) |
|||
@pmul( kernel_radius2x_plus1, kernel_radius, 2 ) |
|||
@padd( kernel_radius2x_plus1, 1 ) |
|||
|
|||
@pmul( kernel_radius2x, kernel_radius, 2 ) |
|||
|
|||
/// Deal with taps to our right. |
|||
/// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 ) |
|||
@foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 ) |
|||
@property( !downscale_lq ) |
|||
@foreach( 4, iPixel ) |
|||
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end |
|||
@end @property( downscale_lq ) |
|||
@foreach( 2, iPixel ) |
|||
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end |
|||
@end |
|||
@foreach( 3, iPixel ) |
|||
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end |
|||
@foreach( 1, iPixel ) |
|||
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end |
|||
@end |
|||
|
|||
@property( !downscale_lq ) |
|||
@foreach( 4, iPixel ) |
|||
outColour[ @iPixel ] = firstSmpl[ @iPixel ].x + log( outColour[ @iPixel ].x ) / @value(K);@end |
|||
@end @property( downscale_lq ) |
|||
@foreach( 2, iPixel ) |
|||
outColour[ @iPixel ] = firstSmpl[ @iPixel ].x + log( outColour[ @iPixel ].x ) / @value(K);@end |
|||
@end |
|||
|
|||
/* |
|||
foreach( 4, iPixel ) |
|||
imageStore( outputImage, ivec2( i2Center + iPixel * i2Inc ), vec4( outColour[ iPixel ], 1.0 ) );end |
|||
*/ |
|||
@insertpiece( image_store ) |
|||
} |
|||
|
|||
void main() |
|||
{ |
|||
/// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) |
|||
/// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 ) |
|||
@padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 ) |
|||
@pdiv( samples_per_thread, samples_per_threadgroup, 32 ) |
|||
|
|||
@property( horizontal_pass ) |
|||
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) ); |
|||
int iLineOffset = int( gl_LocalInvocationID.y ); |
|||
|
|||
ivec2 i2GroupCoord = ivec2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u ); |
|||
ivec2 i2Coord = ivec2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y ); |
|||
|
|||
@foreach( samples_per_thread, i ) |
|||
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( @i, gl_LocalInvocationID.y ) , vec2( 0.5f, 0.0f ) );@end |
|||
|
|||
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u ) |
|||
{ |
|||
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] = |
|||
sampleTex( i2GroupCoord + ivec2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), vec2( 0.5f, 0.0f ) ); |
|||
} |
|||
|
|||
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier) |
|||
//barrier ensures every thread's execution reached here. |
|||
memoryBarrierShared(); |
|||
barrier(); |
|||
|
|||
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u |
|||
i2Coord = ivec2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y ); |
|||
i2Coord.x += @value( kernel_radius ); |
|||
|
|||
if( i2Coord.x < int(g_f4OutputSize.x) ) |
|||
{ |
|||
ivec2 i2Center = i2Coord + ivec2( 0, gl_LocalInvocationID.y ); |
|||
ivec2 i2Inc = ivec2 ( 1, 0 ); |
|||
|
|||
@property( downscale ) |
|||
i2Center.x = int( uint( i2Center.x ) >> 1u ); |
|||
@end |
|||
|
|||
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc ); |
|||
} |
|||
@end @property( !horizontal_pass ) |
|||
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) ); |
|||
int iLineOffset = int( gl_LocalInvocationID.y ); |
|||
|
|||
ivec2 i2GroupCoord = ivec2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u ); |
|||
ivec2 i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset ); |
|||
|
|||
@foreach( samples_per_thread, i ) |
|||
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( gl_LocalInvocationID.y, @i ) , vec2( 0.0f, 0.5f ) );@end |
|||
|
|||
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u ) |
|||
{ |
|||
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] = |
|||
sampleTex( i2GroupCoord + ivec2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), vec2( 0.0f, 0.5f ) ); |
|||
} |
|||
|
|||
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier) |
|||
//barrier ensures every thread's execution reached here. |
|||
memoryBarrierShared(); |
|||
barrier(); |
|||
|
|||
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u |
|||
i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset ); |
|||
i2Coord.y += @value( kernel_radius ); |
|||
|
|||
if( i2Coord.y < int(g_f4OutputSize.y) ) |
|||
{ |
|||
ivec2 i2Center = i2Coord + ivec2( gl_LocalInvocationID.y, 0 ); |
|||
ivec2 i2Inc = ivec2 ( 0, 1 ); |
|||
|
|||
@property( downscale ) |
|||
i2Center.y = int( uint( i2Center.y ) >> 1u ); |
|||
@end |
|||
|
|||
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc ); |
|||
} |
|||
@end |
|||
} |
@ -0,0 +1,49 @@ |
|||
#version ogre_glsl_ver_330 |
|||
|
|||
vulkan_layout( ogre_t0 ) uniform texture2D tex; |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
in block |
|||
{ |
|||
vec2 uv0; |
|||
} inPs; |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
out float fragColour; |
|||
|
|||
in vec4 gl_FragCoord; |
|||
|
|||
vulkan( layout( ogre_P0 ) uniform Params { ) |
|||
uniform float weights[NUM_WEIGHTS]; |
|||
vulkan( }; ) |
|||
|
|||
void main() |
|||
{ |
|||
float val; |
|||
float outColour; |
|||
float firstSmpl; |
|||
|
|||
firstSmpl = texelFetch( tex, ivec2( gl_FragCoord.xy ) - ivec2( HORIZONTAL_STEP * (NUM_WEIGHTS - 1), |
|||
VERTICAL_STEP * (NUM_WEIGHTS - 1) ), 0 ).x; |
|||
outColour = weights[0]; |
|||
|
|||
int i; |
|||
for( i=NUM_WEIGHTS - 1; (--i) > 0; ) |
|||
{ |
|||
val = texelFetch( tex, ivec2( gl_FragCoord.xy ) - ivec2( HORIZONTAL_STEP* i, |
|||
VERTICAL_STEP * i ), 0 ).x; |
|||
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-i-1]; |
|||
} |
|||
|
|||
val = texelFetch( tex, ivec2( gl_FragCoord.xy ), 0 ).x; |
|||
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-1]; |
|||
|
|||
for( i=0; i<NUM_WEIGHTS - 1; ++i ) |
|||
{ |
|||
val = texelFetch( tex, ivec2( gl_FragCoord.xy ) + ivec2( HORIZONTAL_STEP* (i+1), |
|||
VERTICAL_STEP * (i+1) ), 0 ).x; |
|||
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-i-2]; |
|||
} |
|||
|
|||
fragColour = firstSmpl + log( outColour ) / K; |
|||
} |
@ -0,0 +1,9 @@ |
|||
#version ogre_glsl_ver_330 |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
out vec4 fragColour; |
|||
|
|||
void main() |
|||
{ |
|||
fragColour = vec4( 0 ); |
|||
} |
@ -0,0 +1,31 @@ |
|||
#version ogre_glsl_ver_330 |
|||
|
|||
#extension GL_ARB_shader_viewport_layer_array : require |
|||
|
|||
#define float2 vec2 |
|||
#define float3 vec3 |
|||
#define float4 vec4 |
|||
|
|||
#define float4x4 mat4 |
|||
#define mul( x, y ) ((x) * (y)) |
|||
|
|||
vulkan( layout( ogre_P0 ) uniform Params { ) |
|||
uniform float4x4 projectionMatrix; |
|||
uniform float2 rsDepthRange; |
|||
vulkan( }; ) |
|||
|
|||
vulkan_layout( OGRE_POSITION ) in vec4 vertex; |
|||
|
|||
vulkan_layout( location = 0 ) |
|||
out gl_PerVertex |
|||
{ |
|||
vec4 gl_Position; |
|||
}; |
|||
|
|||
void main() |
|||
{ |
|||
gl_Position.xy = mul( projectionMatrix, float4( vertex.xy, 0.0f, 1.0f ) ).xy; |
|||
gl_Position.z = rsDepthRange.x; |
|||
gl_Position.w = 1.0f; |
|||
gl_ViewportIndex = int( vertex.z ); |
|||
} |
@ -0,0 +1,40 @@ |
|||
//Based on GPUOpen's samples SeparableFilter11 |
|||
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11 |
|||
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau |
|||
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps |
|||
|
|||
//TL;DR: |
|||
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series). |
|||
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each. |
|||
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256 |
|||
|
|||
@piece( data_type )vec3@end |
|||
@piece( lds_data_type )vec3@end |
|||
@piece( lds_definition ) |
|||
shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ]; |
|||
@end |
|||
|
|||
@piece( extra_params ) |
|||
uniform float srcLodIdx; |
|||
@en |