Browse Source

Initial commit

master
Macoy Madson 3 years ago
commit
b314268d49
  1. 22
      .clang-format
  2. 6
      .gitmodules
  3. 25
      BuildDependencies_Debug.sh
  4. 16
      Build_Debug.sh
  5. 1
      Dependencies/ogre-next
  6. 1
      Dependencies/ogre-next-deps
  7. 47
      data/CommonMaterials/Any/PccDepthCompressor_ps.any
  8. 47
      data/CommonMaterials/Common/Any/PccDepthCompressor_ps.any
  9. 225
      data/CommonMaterials/Common/Copyback.material
  10. 65
      data/CommonMaterials/Common/DPM.material
  11. 134
      data/CommonMaterials/Common/DPSM.material
  12. 67
      data/CommonMaterials/Common/DepthUtils.material
  13. 117
      data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material
  14. 101
      data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material.json
  15. 18
      data/CommonMaterials/Common/GLSL/Copyback_1xFP32_ps.glsl
  16. 22
      data/CommonMaterials/Common/GLSL/Copyback_4xFP32_2DArray_ps.glsl
  17. 18
      data/CommonMaterials/Common/GLSL/Copyback_4xFP32_ps.glsl
  18. 28
      data/CommonMaterials/Common/GLSL/CubeToDpm_4xFP16_ps.glsl
  19. 37
      data/CommonMaterials/Common/GLSL/CubeToDpsm_ps.glsl
  20. 17
      data/CommonMaterials/Common/GLSL/DepthDownscaleMax_ps.glsl
  21. 22
      data/CommonMaterials/Common/GLSL/EsmGaussianBlurLogFilter_cs.glsl
  22. 263
      data/CommonMaterials/Common/GLSL/GaussianBlurBase_cs.glsl
  23. 285
      data/CommonMaterials/Common/GLSL/GaussianBlurLogFilterBase_cs.glsl
  24. 49
      data/CommonMaterials/Common/GLSL/GaussianBlurLogFilter_ps.glsl
  25. 9
      data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_ps.glsl
  26. 31
      data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_vs.glsl
  27. 40
      data/CommonMaterials/Common/GLSL/MipmapsGaussianBlur_cs.glsl
  28. 52
      data/CommonMaterials/Common/GLSL/PccDepthCompressor_ps.glsl
  29. 28
      data/CommonMaterials/Common/GLSL/QuadCameraDirNoUV_vs.glsl
  30. 28
      data/CommonMaterials/Common/GLSL/QuadCameraDir_vs.glsl
  31. 25
      data/CommonMaterials/Common/GLSL/Quad_vs.glsl
  32. 42
      data/CommonMaterials/Common/GLSL/RadialDensityMask_ps.glsl
  33. 28
      data/CommonMaterials/Common/GLSL/RadialDensityMask_vs.glsl
  34. 19
      data/CommonMaterials/Common/GLSL/Resolve_1xFP32_Subsample0_ps.glsl
  35. 20
      data/CommonMaterials/Common/GLSL/SkyCubemap_ps.glsl
  36. 37
      data/CommonMaterials/Common/GLSL/SkyEquirectangular_ps.glsl
  37. 35
      data/CommonMaterials/Common/GLSLES/CubeToDpsm_ps.glsles
  38. 25
      data/CommonMaterials/Common/GLSLES/QuadCameraDir_vs.glsles
  39. 22
      data/CommonMaterials/Common/GLSLES/Quad_vs.glsles
  40. 8
      data/CommonMaterials/Common/HLSL/Copyback_1xFP32_ps.hlsl
  41. 8
      data/CommonMaterials/Common/HLSL/Copyback_4xFP32_2DArray_ps.hlsl
  42. 8
      data/CommonMaterials/Common/HLSL/Copyback_4xFP32_ps.hlsl
  43. 25
      data/CommonMaterials/Common/HLSL/CubeToDpm_4xFP16_ps.hlsl
  44. 30
      data/CommonMaterials/Common/HLSL/CubeToDpsm_ps.hlsl
  45. 21
      data/CommonMaterials/Common/HLSL/DepthDownscaleMax_ps.hlsl
  46. 22
      data/CommonMaterials/Common/HLSL/EsmGaussianBlurLogFilter_cs.hlsl
  47. 242
      data/CommonMaterials/Common/HLSL/GaussianBlurBase_cs.hlsl
  48. 263
      data/CommonMaterials/Common/HLSL/GaussianBlurLogFilterBase_cs.hlsl
  49. 43
      data/CommonMaterials/Common/HLSL/GaussianBlurLogFilter_ps.hlsl
  50. 4
      data/CommonMaterials/Common/HLSL/HiddenAreaMeshVr_ps.hlsl
  51. 27
      data/CommonMaterials/Common/HLSL/HiddenAreaMeshVr_vs.hlsl
  52. 37
      data/CommonMaterials/Common/HLSL/MipmapsGaussianBlur_cs.hlsl
  53. 39
      data/CommonMaterials/Common/HLSL/PccDepthCompressor_ps.hlsl
  54. 31
      data/CommonMaterials/Common/HLSL/QuadCameraDirNoUV_vs.hlsl
  55. 31
      data/CommonMaterials/Common/HLSL/QuadCameraDir_vs.hlsl
  56. 25
      data/CommonMaterials/Common/HLSL/Quad_vs.hlsl
  57. 35
      data/CommonMaterials/Common/HLSL/RadialDensityMask_ps.hlsl
  58. 31
      data/CommonMaterials/Common/HLSL/RadialDensityMask_vs.hlsl
  59. 16
      data/CommonMaterials/Common/HLSL/Resolve_1xFP32_Subsample0_ps.hlsl
  60. 16
      data/CommonMaterials/Common/HLSL/SkyCubemap_ps.hlsl
  61. 25
      data/CommonMaterials/Common/HLSL/SkyEquirectangular_ps.hlsl
  62. 95
      data/CommonMaterials/Common/HiddenAreaMeshVr.material
  63. 17
      data/CommonMaterials/Common/Metal/Copyback_1xFP32_ps.metal
  64. 18
      data/CommonMaterials/Common/Metal/Copyback_4xFP32_2DArray_ps.metal
  65. 17
      data/CommonMaterials/Common/Metal/Copyback_4xFP32_ps.metal
  66. 28
      data/CommonMaterials/Common/Metal/CubeToDpm_4xFP16_ps.metal
  67. 37
      data/CommonMaterials/Common/Metal/CubeToDpsm_ps.metal
  68. 33
      data/CommonMaterials/Common/Metal/DepthDownscaleMax_ps.metal
  69. 25
      data/CommonMaterials/Common/Metal/EsmGaussianBlurLogFilter_cs.metal
  70. 269
      data/CommonMaterials/Common/Metal/GaussianBlurBase_cs.metal
  71. 284
      data/CommonMaterials/Common/Metal/GaussianBlurLogFilterBase_cs.metal
  72. 51
      data/CommonMaterials/Common/Metal/GaussianBlurLogFilter_ps.metal
  73. 7
      data/CommonMaterials/Common/Metal/HiddenAreaMeshVr_ps.metal
  74. 35
      data/CommonMaterials/Common/Metal/HiddenAreaMeshVr_vs.metal
  75. 42
      data/CommonMaterials/Common/Metal/MipmapsGaussianBlur_cs.metal
  76. 45
      data/CommonMaterials/Common/Metal/PccDepthCompressor_ps.metal
  77. 37
      data/CommonMaterials/Common/Metal/QuadCameraDirNoUV_vs.metal
  78. 32
      data/CommonMaterials/Common/Metal/QuadCameraDir_vs.metal
  79. 28
      data/CommonMaterials/Common/Metal/Quad_vs.metal
  80. 41
      data/CommonMaterials/Common/Metal/RadialDensityMask_ps.metal
  81. 38
      data/CommonMaterials/Common/Metal/RadialDensityMask_vs.metal
  82. 17
      data/CommonMaterials/Common/Metal/Resolve_1xFP32_Subsample0_ps.metal
  83. 18
      data/CommonMaterials/Common/Metal/SkyCubemap_ps.metal
  84. 27
      data/CommonMaterials/Common/Metal/SkyEquirectangular_ps.metal
  85. 87
      data/CommonMaterials/Common/Mipmaps.material.json
  86. 74
      data/CommonMaterials/Common/PccDepthCompressor.material
  87. 120
      data/CommonMaterials/Common/Quad.program
  88. 90
      data/CommonMaterials/Common/RadialDensityMask.material
  89. 137
      data/CommonMaterials/Common/Sky.material
  90. BIN
      data/CommonMaterials/Common/brtfLutDfg.dds
  91. BIN
      data/CommonMaterials/Common/ltcMatrix0.dds
  92. BIN
      data/CommonMaterials/Common/ltcMatrix1.dds
  93. 225
      data/CommonMaterials/Copyback.material
  94. 65
      data/CommonMaterials/DPM.material
  95. 134
      data/CommonMaterials/DPSM.material
  96. 67
      data/CommonMaterials/DepthUtils.material
  97. 117
      data/CommonMaterials/EsmGaussianBlurLogFilter.material
  98. 101
      data/CommonMaterials/EsmGaussianBlurLogFilter.material.json
  99. 18
      data/CommonMaterials/GLSL/Copyback_1xFP32_ps.glsl
  100. 22
      data/CommonMaterials/GLSL/Copyback_4xFP32_2DArray_ps.glsl

22
.clang-format

@ -0,0 +1,22 @@
# http://releases.llvm.org/6.0.0/tools/clang/docs/ClangFormatStyleOptions.html
BasedOnStyle: Google
AccessModifierOffset: -4
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: None
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
BreakBeforeBraces: Allman
BraceWrapping:
AfterNamespace: false
BreakBeforeTernaryOperators: false
ColumnLimit: 100
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
IndentWidth: 4
Standard: Cpp11
TabWidth: 4
UseTab: ForIndentation
DerivePointerAlignment: false
PointerAlignment: Left
NamespaceIndentation: None
IndentCaseLabels: true

6
.gitmodules

@ -0,0 +1,6 @@
[submodule "Dependencies/ogre-next"]
path = Dependencies/ogre-next
url = https://github.com/OGRECave/ogre-next
[submodule "Dependencies/ogre-next-deps"]
path = Dependencies/ogre-next-deps
url = https://github.com/OGRECave/ogre-next-deps

25
BuildDependencies_Debug.sh

@ -0,0 +1,25 @@
#!/bin/sh
cd Dependencies/
# See the official script at
# https://raw.githubusercontent.com/OGRECave/ogre-next/master/Scripts/BuildScripts/output/build_ogre_linux_c%2B%2Blatest.sh
echo "Building Ogre dependencies..."
cd ogre-next-deps && mkdir -p build && cd build && cmake -G Ninja .. || exit $?
ninja || exit $?
ninja install || exit $?
echo "Building Ogre..."
cd ../../ogre-next
if test ! -f Dependencies; then
ln -s ../ogre-next-deps/build/ogredeps Dependencies
fi
mkdir -p build/Debug
mkdir -p build/Release
cd build/Debug
echo "--- Building Ogre (Debug) ---"
cmake -D OGRE_USE_BOOST=0 -D OGRE_CONFIG_THREAD_PROVIDER=0 -D OGRE_CONFIG_THREADS=0 -D OGRE_BUILD_COMPONENT_SCENE_FORMAT=1 -D OGRE_BUILD_SAMPLES2=1 -D OGRE_BUILD_TESTS=1 -D CMAKE_BUILD_TYPE="Debug" -G Ninja ../.. || exit $?
ninja || exit $?
cd ../Release
echo "--- Building Ogre (Release) ---"
cmake -D OGRE_USE_BOOST=0 -D OGRE_CONFIG_THREAD_PROVIDER=0 -D OGRE_CONFIG_THREADS=0 -D OGRE_BUILD_COMPONENT_SCENE_FORMAT=1 -D OGRE_BUILD_SAMPLES2=1 -D OGRE_BUILD_TESTS=1 -D CMAKE_BUILD_TYPE="Release" -G Ninja ../.. || exit $?
ninja || exit $?

16
Build_Debug.sh

@ -0,0 +1,16 @@
#!/bin/sh
# Note that this will rebuild and re-link every time. You should use a build system to save time!
# Compile
clang++ -c main.cpp -g -IDependencies/ogre-next/OgreMain/include \
-IDependencies/ogre-next/Components/Hlms/Common/include \
-IDependencies/ogre-next/Components/Hlms/Pbs/include \
-IDependencies/ogre-next/Components/Hlms/Unlit/include \
-IDependencies/ogre-next/build/Debug/include \
-IDependencies/ogre-next/Components/Overlay/include
# Link
clang++ -o ogreApp main.o -g -LDependencies/ogre-next/build/Debug/lib \
-lOgreHlmsPbs_d -lOgreHlmsUnlit_d -lOgreMain_d -lOgreOverlay_d \
-Wl,-rpath,.:Dependencies/ogre-next/build/Debug/lib

1
Dependencies/ogre-next

@ -0,0 +1 @@
Subproject commit 3dceaab8f45e71b5f9ee68f7666b522c558396ae

1
Dependencies/ogre-next-deps

@ -0,0 +1 @@
Subproject commit 43363bea749e33b78e5394c92d1fe2ebd2edbceb

47
data/CommonMaterials/Any/PccDepthCompressor_ps.any

@ -0,0 +1,47 @@
//#include "SyntaxHighlightingMisc.h"
#ifdef HEADER
/**
Finds the intersection between the cube of half size probeShapeHalfSize and center at origin
and the reflDir, so that posLS + reflDirLS * retVal = intersectionPos;
*/
INLINE float findIntersectionDistance( float3 reflDirLS, float3 posLS, float3 probeShapeHalfSize )
{
//Find the ray intersection with box plane
float3 invReflDirLS = float3( 1.0, 1.0, 1.0 ) / reflDirLS;
float3 intersectAtMinPlane = ( -probeShapeHalfSize - posLS ) * invReflDirLS;
float3 intersectAtMaxPlane = ( probeShapeHalfSize - posLS ) * invReflDirLS;
//Get the largest intersection values (we are not interested in negative values)
float3 largestIntersect = max( intersectAtMaxPlane.xyz, intersectAtMinPlane.xyz );
//Get the closest of all solutions
float distance = min( min( largestIntersect.x, largestIntersect.y ), largestIntersect.z );
return distance;
}
#else
float fDepth = OGRE_Sample( depthTexture, pointSampler, inPs.uv0 ).x;
float linearDepth = p_projectionParams.y / (fDepth - p_projectionParams.x);
float3 viewSpacePosition = inPs.cameraDir * linearDepth;
float fDist = length( viewSpacePosition.xyz );
float3 probeToPosDir = viewSpacePosition / fDist;
probeToPosDir = mul( p_viewSpaceToProbeLocalSpace, probeToPosDir );
float fApproxDist = findIntersectionDistance( probeToPosDir, p_cameraPosLS, p_probeShapeHalfSize );
//We can't store fDist directly because we have limited precision (often 8 bits)
//Thus we store it in terms of
// fApproxDist * alpha = fDist;
//During render we'll know fApproxDist and alpha, but want to know fDist
//We also know alpha >= 0
//For precision issues and because it's good enough, we force alpha <= 2.0
float alpha = fDist / fApproxDist;
alpha *= 0.5;
alpha = min( alpha, 1.0 );
#endif

47
data/CommonMaterials/Common/Any/PccDepthCompressor_ps.any

@ -0,0 +1,47 @@
//#include "SyntaxHighlightingMisc.h"
#ifdef HEADER
/**
Finds the intersection between the cube of half size probeShapeHalfSize and center at origin
and the reflDir, so that posLS + reflDirLS * retVal = intersectionPos;
*/
INLINE float findIntersectionDistance( float3 reflDirLS, float3 posLS, float3 probeShapeHalfSize )
{
//Find the ray intersection with box plane
float3 invReflDirLS = float3( 1.0, 1.0, 1.0 ) / reflDirLS;
float3 intersectAtMinPlane = ( -probeShapeHalfSize - posLS ) * invReflDirLS;
float3 intersectAtMaxPlane = ( probeShapeHalfSize - posLS ) * invReflDirLS;
//Get the largest intersection values (we are not interested in negative values)
float3 largestIntersect = max( intersectAtMaxPlane.xyz, intersectAtMinPlane.xyz );
//Get the closest of all solutions
float distance = min( min( largestIntersect.x, largestIntersect.y ), largestIntersect.z );
return distance;
}
#else
float fDepth = OGRE_Sample( depthTexture, pointSampler, inPs.uv0 ).x;
float linearDepth = p_projectionParams.y / (fDepth - p_projectionParams.x);
float3 viewSpacePosition = inPs.cameraDir * linearDepth;
float fDist = length( viewSpacePosition.xyz );
float3 probeToPosDir = viewSpacePosition / fDist;
probeToPosDir = mul( p_viewSpaceToProbeLocalSpace, probeToPosDir );
float fApproxDist = findIntersectionDistance( probeToPosDir, p_cameraPosLS, p_probeShapeHalfSize );
//We can't store fDist directly because we have limited precision (often 8 bits)
//Thus we store it in terms of
// fApproxDist * alpha = fDist;
//During render we'll know fApproxDist and alpha, but want to know fDist
//We also know alpha >= 0
//For precision issues and because it's good enough, we force alpha <= 2.0
float alpha = fDist / fApproxDist;
alpha *= 0.5;
alpha = min( alpha, 1.0 );
#endif

225
data/CommonMaterials/Common/Copyback.material

@ -0,0 +1,225 @@
fragment_program Ogre/Copy/4xFP32_ps_HLSL hlsl
{
source Copyback_4xFP32_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Copy/4xFP32_ps_GLSL glsl
{
source Copyback_4xFP32_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Copy/4xFP32_ps_VK glslvk
{
source Copyback_4xFP32_ps.glsl
}
fragment_program Ogre/Copy/4xFP32_ps_Metal metal
{
source Copyback_4xFP32_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Copy/4xFP32_ps unified
{
delegate Ogre/Copy/4xFP32_ps_GLSL
delegate Ogre/Copy/4xFP32_ps_VK
delegate Ogre/Copy/4xFP32_ps_HLSL
delegate Ogre/Copy/4xFP32_ps_Metal
}
material Ogre/Copy/4xFP32
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Copy/4xFP32_ps
{
}
texture_unit
{
filtering none
tex_address_mode clamp
}
}
}
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_HLSL hlsl
{
source Copyback_4xFP32_2DArray_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_GLSL glsl
{
source Copyback_4xFP32_2DArray_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_VK glslvk
{
source Copyback_4xFP32_2DArray_ps.glsl
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_Metal metal
{
source Copyback_4xFP32_2DArray_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps unified
{
delegate Ogre/Copy/4xFP32_2DArray_ps_GLSL
delegate Ogre/Copy/4xFP32_2DArray_ps_VK
delegate Ogre/Copy/4xFP32_2DArray_ps_HLSL
delegate Ogre/Copy/4xFP32_2DArray_ps_Metal
}
material Ogre/Copy/4xFP32_2DArray
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Copy/4xFP32_2DArray_ps
{
param_named sliceIdx float 0
}
texture_unit
{
filtering none
tex_address_mode clamp
}
}
}
}
fragment_program Ogre/Copy/1xFP32_ps_HLSL hlsl
{
source Copyback_1xFP32_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Copy/1xFP32_ps_GLSL glsl
{
source Copyback_1xFP32_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Copy/1xFP32_ps_VK glslvk
{
source Copyback_1xFP32_ps.glsl
}
fragment_program Ogre/Copy/1xFP32_ps_Metal metal
{
source Copyback_1xFP32_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Copy/1xFP32_ps unified
{
delegate Ogre/Copy/1xFP32_ps_GLSL
delegate Ogre/Copy/1xFP32_ps_VK
delegate Ogre/Copy/1xFP32_ps_HLSL
delegate Ogre/Copy/1xFP32_ps_Metal
}
material Ogre/Copy/1xFP32 : Ogre/Copy/4xFP32
{
technique
{
pass
{
fragment_program_ref Ogre/Copy/1xFP32_ps
{
}
}
}
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_GLSL glsl
{
source Resolve_1xFP32_Subsample0_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_VK glslvk
{
source Resolve_1xFP32_Subsample0_ps.glsl
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_HLSL hlsl
{
source Resolve_1xFP32_Subsample0_ps.hlsl
entry_point main
target ps_5_0 ps_4_1
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_Metal metal
{
source Resolve_1xFP32_Subsample0_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps unified
{
delegate Ogre/Resolve/1xFP32_Subsample0_ps_GLSL
delegate Ogre/Resolve/1xFP32_Subsample0_ps_VK
delegate Ogre/Resolve/1xFP32_Subsample0_ps_HLSL
delegate Ogre/Resolve/1xFP32_Subsample0_ps_Metal
}
material Ogre/Resolve/1xFP32_Subsample0
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Resolve/1xFP32_Subsample0_ps
{
}
texture_unit
{
filtering none
tex_address_mode clamp
}
}
}
}

65
data/CommonMaterials/Common/DPM.material

@ -0,0 +1,65 @@
//DPM stands for Dual Parabolloid Mapping.
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL glsl
{
source CubeToDpm_4xFP16_ps.glsl
default_params
{
param_named cubeTexture int 0
}
}
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_VK glslvk
{
source CubeToDpm_4xFP16_ps.glsl
}
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL hlsl
{
source CubeToDpm_4xFP16_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_Metal metal
{
source CubeToDpm_4xFP16_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps unified
{
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_VK
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_Metal
}
// Converts a cubemap to DPM in the pixel shader.
material Ogre/DPM/CubeToDpm
{
technique
{
pass
{
depth_check off
depth_func always_pass
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/DPM/CubeToDpm_4xFP16_ps
{
}
texture_unit depthTexture
{
filtering bilinear
tex_address_mode clamp
}
}
}
}

134
data/CommonMaterials/Common/DPSM.material

@ -0,0 +1,134 @@
//DPSM stands for Dual Parabolloid Shadow Mapping.
fragment_program Ogre/DPSM/CubeToDpsm_ps_GLSL glsl
{
source CubeToDpsm_ps.glsl
default_params
{
param_named depthTexture int 0
}
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_GLSL glsl : Ogre/DPSM/CubeToDpsm_ps_GLSL
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps_VK glslvk
{
source CubeToDpsm_ps.glsl
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_VK glslvk : Ogre/DPSM/CubeToDpsm_ps_VK
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps_GLSLES glsles
{
source CubeToDpsm_ps.glsles
default_params
{
param_named depthTexture int 0
}
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_GLSLES glsles : Ogre/DPSM/CubeToDpsm_ps_GLSLES
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps_HLSL hlsl
{
source CubeToDpsm_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_HLSL hlsl : Ogre/DPSM/CubeToDpsm_ps_HLSL
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps_Metal metal
{
source CubeToDpsm_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_Metal metal : Ogre/DPSM/CubeToDpsm_ps_Metal
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps unified
{
delegate Ogre/DPSM/CubeToDpsm_ps_GLSL
delegate Ogre/DPSM/CubeToDpsm_ps_GLSLES
delegate Ogre/DPSM/CubeToDpsm_ps_VK
delegate Ogre/DPSM/CubeToDpsm_ps_HLSL
delegate Ogre/DPSM/CubeToDpsm_ps_Metal
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps unified
{
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_GLSL
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_GLSLES
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_VK
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_HLSL
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_Metal
}
// Converts a cubemap to DPSM in the pixel shader.
material Ogre/DPSM/CubeToDpsm
{
technique
{
pass
{
//Depth writes must be on, since we write directly to the depth buffer.
depth_check on
depth_write on
depth_func always_pass
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/DPSM/CubeToDpsm_ps
{
}
texture_unit depthTexture
{
filtering none
tex_address_mode clamp
}
}
}
}
material Ogre/DPSM/CubeToDpsmColour
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/DPSM/CubeToDpsm_Colour_ps
{
}
texture_unit depthTexture
{
filtering none
tex_address_mode clamp
}
}
}
}

67
data/CommonMaterials/Common/DepthUtils.material

@ -0,0 +1,67 @@
fragment_program Ogre/Depth/DownscaleMax_ps_GLSL glsl
{
source DepthDownscaleMax_ps.glsl
default_params
{
param_named depthTexture int 0
}
}
fragment_program Ogre/Depth/DownscaleMax_ps_VK glslvk
{
source DepthDownscaleMax_ps.glsl
}
fragment_program Ogre/Depth/DownscaleMax_ps_HLSL hlsl
{
source DepthDownscaleMax_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Depth/DownscaleMax_ps_Metal metal
{
source DepthDownscaleMax_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Depth/DownscaleMax_ps unified
{
delegate Ogre/Depth/DownscaleMax_ps_GLSL
delegate Ogre/Depth/DownscaleMax_ps_VK
delegate Ogre/Depth/DownscaleMax_ps_HLSL
delegate Ogre/Depth/DownscaleMax_ps_Metal
}
// Downscales resolution of input depth texture by half (w/2 x h/2)
// using a max filter (max depth of all 4 neighbours)
material Ogre/Depth/DownscaleMax
{
technique
{
pass
{
depth_check on
depth_write on
depth_func always_pass
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Depth/DownscaleMax_ps
{
}
texture_unit depthTexture
{
filtering none
tex_address_mode clamp
}
}
}
}

117
data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material

@ -0,0 +1,117 @@
fragment_program ESM/GaussianLogFilterH_ps_GLSL glsl
{
source GaussianBlurLogFilter_ps.glsl
preprocessor_defines NUM_WEIGHTS=9,K=80,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
default_params
{
param_named tex int 0
}
}
fragment_program ESM/GaussianLogFilterV_ps_GLSL glsl : ESM/GaussianLogFilterH_ps_GLSL
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps_VK glslvk
{
source GaussianBlurLogFilter_ps.glsl
preprocessor_defines NUM_WEIGHTS=9,K=80,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
}
fragment_program ESM/GaussianLogFilterV_ps_VK glslvk : ESM/GaussianLogFilterH_ps_VK
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps_HLSL hlsl
{
source GaussianBlurLogFilter_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
}
fragment_program ESM/GaussianLogFilterV_ps_HLSL hlsl : ESM/GaussianLogFilterH_ps_HLSL
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps_Metal metal
{
source GaussianBlurLogFilter_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
}
fragment_program ESM/GaussianLogFilterV_ps_Metal metal : ESM/GaussianLogFilterH_ps_Metal
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps unified
{
delegate ESM/GaussianLogFilterH_ps_GLSL
delegate ESM/GaussianLogFilterH_ps_VK
delegate ESM/GaussianLogFilterH_ps_HLSL
delegate ESM/GaussianLogFilterH_ps_Metal
default_params
{
param_named weights float9 0.013960189 0.022308320 0.033488754 0.047226712 0.062565230 0.077863686 0.091031872 0.099978946 0.10315263
}
}
fragment_program ESM/GaussianLogFilterV_ps unified
{
delegate ESM/GaussianLogFilterV_ps_GLSL
delegate ESM/GaussianLogFilterV_ps_VK
delegate ESM/GaussianLogFilterV_ps_HLSL
delegate ESM/GaussianLogFilterV_ps_Metal
default_params
{
param_named weights float9 0.013960189 0.022308320 0.033488754 0.047226712 0.062565230 0.077863686 0.091031872 0.099978946 0.10315263
}
}
material ESM/GaussianLogFilterH
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref ESM/GaussianLogFilterH_ps
{
}
texture_unit tex
{
filtering none
tex_address_mode clamp
}
}
}
}
material ESM/GaussianLogFilterV : ESM/GaussianLogFilterH
{
technique
{
pass
{
fragment_program_ref ESM/GaussianLogFilterV_ps
{
}
}
}
}

101
data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material.json

@ -0,0 +1,101 @@
{
"samplers" :
{
"PointClamp" :
{
"min" : "point",
"mag" : "point",
"mip" : "point",
"u" : "clamp",
"v" : "clamp",
"w" : "clamp",
"miplodbias" : 0,
"max_anisotropic" : 1,
"compare_function" : "disabled",
"border" : [1, 1, 1, 1],
"min_lod" : -3.40282347E+38,
"max_lod" : 3.40282347E+38
}
},
"compute" :
{
"ESM/GaussianLogFilterH" :
{
"threads_per_group" : [32, 2, 1],
"thread_groups" : [8, 512, 1],
"source" : "GaussianBlurLogFilterBase_cs",
"pieces" : ["EsmGaussianBlurLogFilter_cs"],
"inform_shader_of_texture_data_change" : true,
"uav_units" : 1,
"textures" :
[
{
"sampler" : "PointClamp"
}
],
"params" :
[
["g_f4OutputSize", "packed_texture_size", 0],
["c_weights[0]", [0.013960189, 0.022308320, 0.033488754, 0.047226712,
0.062565230, 0.077863686, 0.091031872, 0.099978946, 0.10315263]]
],
"params_glsl" :
[
["inputImage", [0], "int"],
["outputImage", [0], "int"]
],
"properties" :
{
"horizontal_pass" : 1,
"kernel_radius" : 8,
"K" : 80
}
},
"ESM/GaussianLogFilterV" :
{
"threads_per_group" : [32, 2, 1],
"thread_groups" : [512, 8, 1],
"source" : "GaussianBlurLogFilterBase_cs",
"pieces" : ["EsmGaussianBlurLogFilter_cs"],
"inform_shader_of_texture_data_change" : true,
"uav_units" : 1,
"textures" :
[
{
"sampler" : "PointClamp"
}
],
"params" :
[
["g_f4OutputSize", "packed_texture_size", 0],
["c_weights[0]", [0.013960189, 0.022308320, 0.033488754, 0.047226712,
0.062565230, 0.077863686, 0.091031872, 0.099978946, 0.10315263]]
],
"params_glsl" :
[
["inputImage", [0], "int"],
["outputImage", [0], "int"]
],
"properties" :
{
"horizontal_pass" : 0,
"kernel_radius" : 8,
"K" : 80
}
}
}
}

18
data/CommonMaterials/Common/GLSL/Copyback_1xFP32_ps.glsl

@ -0,0 +1,18 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2D tex;
vulkan( layout( ogre_s0 ) uniform sampler texSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out float fragColour;
void main()
{
fragColour = texture( vkSampler2D( tex, texSampler ), inPs.uv0 ).x;
}

22
data/CommonMaterials/Common/GLSL/Copyback_4xFP32_2DArray_ps.glsl

@ -0,0 +1,22 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2DArray tex;
vulkan( layout( ogre_s0 ) uniform sampler texSampler );
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float sliceIdx;
vulkan( }; )
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out vec4 fragColour;
void main()
{
fragColour = texture( vkSampler2DArray( tex, texSampler ), vec3( inPs.uv0, sliceIdx ) );
}

18
data/CommonMaterials/Common/GLSL/Copyback_4xFP32_ps.glsl

@ -0,0 +1,18 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2D tex;
vulkan( layout( ogre_s0 ) uniform sampler texSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out vec4 fragColour;
void main()
{
fragColour = texture( vkSampler2D( tex, texSampler ), inPs.uv0 );
}

28
data/CommonMaterials/Common/GLSL/CubeToDpm_4xFP16_ps.glsl

@ -0,0 +1,28 @@
#version ogre_glsl_ver_330
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float lodLevel;
vulkan( }; )
vulkan_layout( ogre_t0 ) uniform textureCube cubeTexture;
vulkan( layout( ogre_s0 ) uniform sampler cubeSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out vec4 fragColour;
void main()
{
vec3 cubeDir;
cubeDir.x = mod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0;
cubeDir.y = inPs.uv0.y * 2.0 - 1.0;
cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y);
cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z;
fragColour.xyzw = textureLod( vkSamplerCube( cubeTexture, cubeSampler ), cubeDir.xyz, lodLevel ).xyzw;
}

37
data/CommonMaterials/Common/GLSL/CubeToDpsm_ps.glsl

@ -0,0 +1,37 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform textureCube depthTexture;
vulkan( layout( ogre_s0 ) uniform sampler cubeSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
in vec4 gl_FragCoord;
//out float gl_FragDepth;
#ifdef OUTPUT_TO_COLOUR
vulkan_layout( location = 0 )
out float fragColour;
#endif
void main()
{
vec3 cubeDir;
cubeDir.x = mod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0;
cubeDir.y = inPs.uv0.y * 2.0 - 1.0;
cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y);
cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z;
float depthValue = textureLod( vkSamplerCube( depthTexture, cubeSampler ), cubeDir.xyz, 0 ).x;
#ifdef OUTPUT_TO_COLOUR
fragColour = depthValue;
#else
gl_FragDepth = depthValue;
#endif
}

17
data/CommonMaterials/Common/GLSL/DepthDownscaleMax_ps.glsl

@ -0,0 +1,17 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2D depthTexture;
in vec4 gl_FragCoord;
//out float gl_FragDepth;
void main()
{
float fDepth0 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0), 0 ).x;
float fDepth1 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 0, 1 ), 0 ).x;
float fDepth2 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 1, 0 ), 0 ).x;
float fDepth3 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 1, 1 ), 0 ).x;
//gl_FragDepth = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0), 0 ).x;
gl_FragDepth = max( max( fDepth0, fDepth1 ), max( fDepth2, fDepth3 ) );
}

22
data/CommonMaterials/Common/GLSL/EsmGaussianBlurLogFilter_cs.glsl

@ -0,0 +1,22 @@
//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps
//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256
@piece( data_type )float@end
@piece( lds_data_type )float@end
@piece( lds_definition )shared float g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];@end
@piece( image_sample )
return textureLod( vkSampler2D( inputImage, inputSampler ), f2SamplePosition, 0 ).x;
@end
@piece( image_store )
@foreach( 4, iPixel )
imageStore( outputImage, ivec2( i2Center + @iPixel * i2Inc ), vec4( outColour[ @iPixel ], 0, 0, 1.0 ) );@end
@end

263
data/CommonMaterials/Common/GLSL/GaussianBlurBase_cs.glsl

@ -0,0 +1,263 @@
@property( syntax != glslvk )
#version 430
@else
#version 450
@end
//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps
//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256
// For this shader to work, several pieces need to be defined:
// data_type (i.e. vec3)
// lds_data_type (i.e. vec3, uint)
// lds_definition
// image_store
// image_sample
// decode_lds (optional, i.e. when lds_data_type != data_type)
// Define the property "downscale" if you're doing a downsample.
// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale
// The script uses the template syntax to automatically set the num. of threadgroups
// based on the bound input texture.
vulkan( layout( ogre_s0 ) uniform sampler inputSampler );
@property( texture0_texture_type == TextureTypes_Type2DArray )
vulkan_layout( ogre_t0 ) uniform texture2DArray inputImage;
@else
vulkan_layout( ogre_t0 ) uniform texture2D inputImage;
@end
layout( vulkan( ogre_u0 ) vk_comma @insertpiece(uav0_pf_type) )
@property( uav0_texture_type == TextureTypes_Type2DArray )
uniform restrict writeonly image2DArray outputImage;
@else
uniform restrict writeonly image2D outputImage;
@end
// 32 = 128 / 4
layout( local_size_x = 32,
local_size_y = 2,
local_size_z = 1 ) in;
@pset( threads_per_group_x, 32 )
@pset( threads_per_group_y, 2 )
@pset( threads_per_group_z, 1 )
@pmul( pixelsPerRow, threads_per_group_x, 4 )
@pset( rowsPerThreadGroup, threads_per_group_y )
@pset( num_thread_groups_z, 1 )
@set( input_width, uav0_width_with_lod )
@set( input_height, uav0_height_with_lod )
@property( horizontal_pass )
@property( downscale ) @mul( input_width, 2 ) @end
/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow
/// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup
@add( num_thread_groups_x, input_width, pixelsPerRow )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, pixelsPerRow )
@add( num_thread_groups_y, input_height, rowsPerThreadGroup )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, rowsPerThreadGroup )
@end @property( !horizontal_pass )
@property( downscale ) @mul( input_height, 2 ) @end
/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup
/// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow
@add( num_thread_groups_x, input_width, rowsPerThreadGroup )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, rowsPerThreadGroup )
@add( num_thread_groups_y, input_height, pixelsPerRow )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, pixelsPerRow )
@end
/// shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];
@insertpiece( lds_definition )
#define C_WEIGHTS( x ) c_weights[(x) >> 2u][(x) & 3u]
// weights_array_count = ( kernel_radius + 1u + 3u ) / 4u
@padd( weights_array_count, kernel_radius, 4 )
@pdiv( weights_array_count, 4 )
vulkan( layout( ogre_P0 ) uniform Params { )
uniform vec4 g_f4OutputSize;
uniform vec4 c_weights[@value( weights_array_count )];
@insertpiece( extra_params )
vulkan( }; )
@insertpiece( lds_data_type ) sampleTex( ivec2 i2Position , vec2 f2Offset )
{
vec2 f2SamplePosition = vec2( i2Position ) + vec2( 0.5f, 0.5f );
f2SamplePosition *= g_f4OutputSize.zw;
///return textureLod( inputImage, f2SamplePosition, 0 ).xyz;
@insertpiece( image_sample )
}
void ComputeFilterKernel( int iPixelOffset, int iLineOffset, ivec2 i2Center, ivec2 i2Inc )
{
@property( !downscale_lq )
@insertpiece( data_type ) outColour[ 4 ];
@end @property( downscale_lq )
@insertpiece( data_type ) outColour[ 2 ];
@end
@insertpiece( data_type ) RDI[ 4 ] ;
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] = RDI[ @iPixel ] * C_WEIGHTS( @value( kernel_radius ) );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] = RDI[ @iPixel * 2 ] * C_WEIGHTS( @value( kernel_radius ) );@end
@end
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end
iPixelOffset += 4;
/// Deal with taps to our left.
/// for ( iIteration = 0; iIteration < radius; iIteration += 1 )
@foreach( kernel_radius, iIteration )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel ] * C_WEIGHTS( @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * C_WEIGHTS( @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end
@padd( kernel_radius_plus1, kernel_radius, 1 )
@pmul( kernel_radius2x_plus1, kernel_radius, 2 )
@padd( kernel_radius2x_plus1, 1 )
@pmul( kernel_radius2x, kernel_radius, 2 )
/// Deal with taps to our right.
/// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 )
@foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel ] * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end
/*
foreach( 4, iPixel )
imageStore( outputImage, ivec2( i2Center + iPixel * i2Inc ), vec4( outColour[ iPixel ], 1.0 ) );end
*/
@insertpiece( image_store )
}
void main()
{
/// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 )
/// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 )
@padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 )
@pdiv( samples_per_thread, samples_per_threadgroup, 32 )
@property( horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );
ivec2 i2GroupCoord = ivec2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u );
ivec2 i2Coord = ivec2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y );
@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( @i, gl_LocalInvocationID.y ) , vec2( 0.5f, 0.0f ) );@end
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), vec2( 0.5f, 0.0f ) );
}
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y );
i2Coord.x += @value( kernel_radius );
if( i2Coord.x < int(g_f4OutputSize.x) )
{
ivec2 i2Center = i2Coord + ivec2( 0, gl_LocalInvocationID.y );
ivec2 i2Inc = ivec2 ( 1, 0 );
@property( downscale )
i2Center.x = int( uint( i2Center.x ) >> 1u );
@end
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end @property( !horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );
ivec2 i2GroupCoord = ivec2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u );
ivec2 i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset );
@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( gl_LocalInvocationID.y, @i ) , vec2( 0.0f, 0.5f ) );@end
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), vec2( 0.0f, 0.5f ) );
}
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset );
i2Coord.y += @value( kernel_radius );
if( i2Coord.y < int(g_f4OutputSize.y) )
{
ivec2 i2Center = i2Coord + ivec2( gl_LocalInvocationID.y, 0 );
ivec2 i2Inc = ivec2 ( 0, 1 );
@property( downscale )
i2Center.y = int( uint( i2Center.y ) >> 1u );
@end
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end
}

285
data/CommonMaterials/Common/GLSL/GaussianBlurLogFilterBase_cs.glsl

@ -0,0 +1,285 @@
@property( syntax != glslvk )
#version 430
@else
#version 450
@end
//See GaussianBlurBase_cs for the original.
//This is a derived version which is used for filtering ESM (Exponential Shadow Maps).
//Normally ESM is in exponential space: exp( K * linearSpaceDepth );
//Filtering should be done in that space.
//However because of precision reasons, we store linearSpaceDepth instead. In order to perform
//correct filtering, we use the following formula:
// exp( filteredDepth ) = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 ) + ...
//
//But this is not precision friendly. So we do instead:
// = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 )
// = exp( d0 ) * ( w0 + w1 * exp( d1 ) / exp( d0 ) + w2 * exp( d2 ) / exp( d0 ) )
// = exp( d0 ) * ( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) )
// = exp( d0 ) * exp( log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) )
// = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) )
// exp( filteredDepth ) = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) )
//Almost final formula:
// filteredDepth = d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) )
//
//The formula is actually:
// exp( K * filteredDepth ) = w0 * exp( K * d0 ) + w1 * exp( K * d1 ) + w2 * exp( K * d2 ) + ...
//Final formula:
// = d0 + log( w0 + w1 * exp( K * (d1 - d0) ) + w2 * exp( K * (d2 - d0) ) ) / K
//Like in the original filter:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256
// For this shader to work, several pieces need to be defined:
// data_type (i.e. vec3)
// lds_data_type (i.e. vec3, uint)
// lds_definition
// image_store
// image_sample
// decode_lds (optional, i.e. when lds_data_type != data_type)
// Define the property "downscale" if you're doing a downsample.
// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale
// The script uses the template syntax to automatically set the num. of threadgroups
// based on the bound input texture.
vulkan( layout( ogre_s0 ) uniform sampler inputSampler );
vulkan_layout( ogre_t0 ) uniform texture2D inputImage;
layout( vulkan( ogre_u0 ) vk_comma @insertpiece(uav0_pf_type) )
uniform restrict writeonly image2D outputImage;
// 32 = 128 / 4
layout( local_size_x = 32,
local_size_y = 2,
local_size_z = 1 ) in;
@pset( threads_per_group_x, 32 )
@pset( threads_per_group_y, 2 )
@pset( threads_per_group_z, 1 )
@pmul( pixelsPerRow, threads_per_group_x, 4 )
@pset( rowsPerThreadGroup, threads_per_group_y )
@pset( num_thread_groups_z, 1 )
@set( input_width, uav0_width_with_lod )
@set( input_height, uav0_height_with_lod )
@property( horizontal_pass )
@property( downscale ) @mul( input_width, 2 ) @end
/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow
/// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup
@add( num_thread_groups_x, input_width, pixelsPerRow )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, pixelsPerRow )
@add( num_thread_groups_y, input_height, rowsPerThreadGroup )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, rowsPerThreadGroup )
@end @property( !horizontal_pass )
@property( downscale ) @mul( input_height, 2 ) @end
/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup
/// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow
@add( num_thread_groups_x, input_width, rowsPerThreadGroup )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, rowsPerThreadGroup )
@add( num_thread_groups_y, input_height, pixelsPerRow )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, pixelsPerRow )
@end
/// shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];
@insertpiece( lds_definition )
#define C_WEIGHTS( x ) c_weights[(x) >> 2u][(x) & 3u]
// weights_array_count = ( kernel_radius + 1u + 3u ) / 4u
@padd( weights_array_count, kernel_radius, 4 )
@pdiv( weights_array_count, 4 )
vulkan( layout( ogre_P0 ) uniform Params { )
uniform vec4 g_f4OutputSize;
uniform vec4 c_weights[@value( weights_array_count )];
@insertpiece( extra_params )
vulkan( }; )
@insertpiece( lds_data_type ) sampleTex( ivec2 i2Position , vec2 f2Offset )
{
vec2 f2SamplePosition = vec2( i2Position ) + vec2( 0.5f, 0.5f );
f2SamplePosition *= g_f4OutputSize.zw;
///return textureLod( inputImage, f2SamplePosition, 0 ).xyz;
@insertpiece( image_sample )
}
void ComputeFilterKernel( int iPixelOffset, int iLineOffset, ivec2 i2Center, ivec2 i2Inc )
{
@property( !downscale_lq )
@insertpiece( data_type ) outColour[ 4 ];
@insertpiece( data_type ) firstSmpl[ 4 ];
@end @property( downscale_lq )
@insertpiece( data_type ) outColour[ 2 ];
@insertpiece( data_type ) firstSmpl[ 4 ];
@end
@insertpiece( data_type ) RDI[ 4 ] ;
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end
@property( !downscale_lq )
@foreach( 4, iPixel )
firstSmpl[ @iPixel ].x = RDI[ @iPixel ];
outColour[ @iPixel ].x = C_WEIGHTS( @value( kernel_radius ) );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
firstSmpl[ @iPixel ].x = RDI[ @iPixel * 2 ];
outColour[ @iPixel ].x = C_WEIGHTS( @value( kernel_radius ) );@end
@end
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end
iPixelOffset += 4;
/// Deal with taps to our left.
/// for ( iIteration = 0; iIteration < radius; iIteration += 1 )
@foreach( kernel_radius, iIteration )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end
@padd( kernel_radius_plus1, kernel_radius, 1 )
@pmul( kernel_radius2x_plus1, kernel_radius, 2 )
@padd( kernel_radius2x_plus1, 1 )
@pmul( kernel_radius2x, kernel_radius, 2 )
/// Deal with taps to our right.
/// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 )
@foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] = firstSmpl[ @iPixel ].x + log( outColour[ @iPixel ].x ) / @value(K);@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] = firstSmpl[ @iPixel ].x + log( outColour[ @iPixel ].x ) / @value(K);@end
@end
/*
foreach( 4, iPixel )
imageStore( outputImage, ivec2( i2Center + iPixel * i2Inc ), vec4( outColour[ iPixel ], 1.0 ) );end
*/
@insertpiece( image_store )
}
void main()
{
/// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 )
/// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 )
@padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 )
@pdiv( samples_per_thread, samples_per_threadgroup, 32 )
@property( horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );
ivec2 i2GroupCoord = ivec2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u );
ivec2 i2Coord = ivec2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y );
@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( @i, gl_LocalInvocationID.y ) , vec2( 0.5f, 0.0f ) );@end
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), vec2( 0.5f, 0.0f ) );
}
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y );
i2Coord.x += @value( kernel_radius );
if( i2Coord.x < int(g_f4OutputSize.x) )
{
ivec2 i2Center = i2Coord + ivec2( 0, gl_LocalInvocationID.y );
ivec2 i2Inc = ivec2 ( 1, 0 );
@property( downscale )
i2Center.x = int( uint( i2Center.x ) >> 1u );
@end
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end @property( !horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );
ivec2 i2GroupCoord = ivec2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u );
ivec2 i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset );
@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( gl_LocalInvocationID.y, @i ) , vec2( 0.0f, 0.5f ) );@end
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), vec2( 0.0f, 0.5f ) );
}
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset );
i2Coord.y += @value( kernel_radius );
if( i2Coord.y < int(g_f4OutputSize.y) )
{
ivec2 i2Center = i2Coord + ivec2( gl_LocalInvocationID.y, 0 );
ivec2 i2Inc = ivec2 ( 0, 1 );
@property( downscale )
i2Center.y = int( uint( i2Center.y ) >> 1u );
@end
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end
}

49
data/CommonMaterials/Common/GLSL/GaussianBlurLogFilter_ps.glsl

@ -0,0 +1,49 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2D tex;
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out float fragColour;
in vec4 gl_FragCoord;
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float weights[NUM_WEIGHTS];
vulkan( }; )
void main()
{
float val;
float outColour;
float firstSmpl;
firstSmpl = texelFetch( tex, ivec2( gl_FragCoord.xy ) - ivec2( HORIZONTAL_STEP * (NUM_WEIGHTS - 1),
VERTICAL_STEP * (NUM_WEIGHTS - 1) ), 0 ).x;
outColour = weights[0];
int i;
for( i=NUM_WEIGHTS - 1; (--i) > 0; )
{
val = texelFetch( tex, ivec2( gl_FragCoord.xy ) - ivec2( HORIZONTAL_STEP* i,
VERTICAL_STEP * i ), 0 ).x;
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-i-1];
}
val = texelFetch( tex, ivec2( gl_FragCoord.xy ), 0 ).x;
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-1];
for( i=0; i<NUM_WEIGHTS - 1; ++i )
{
val = texelFetch( tex, ivec2( gl_FragCoord.xy ) + ivec2( HORIZONTAL_STEP* (i+1),
VERTICAL_STEP * (i+1) ), 0 ).x;
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-i-2];
}
fragColour = firstSmpl + log( outColour ) / K;
}

9
data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_ps.glsl

@ -0,0 +1,9 @@
#version ogre_glsl_ver_330
vulkan_layout( location = 0 )
out vec4 fragColour;
void main()
{
fragColour = vec4( 0 );
}

31
data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_vs.glsl

@ -0,0 +1,31 @@
#version ogre_glsl_ver_330
#extension GL_ARB_shader_viewport_layer_array : require
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define float4x4 mat4
#define mul( x, y ) ((x) * (y))
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float4x4 projectionMatrix;
uniform float2 rsDepthRange;
vulkan( }; )
vulkan_layout( OGRE_POSITION ) in vec4 vertex;
vulkan_layout( location = 0 )
out gl_PerVertex
{
vec4 gl_Position;
};
void main()
{
gl_Position.xy = mul( projectionMatrix, float4( vertex.xy, 0.0f, 1.0f ) ).xy;
gl_Position.z = rsDepthRange.x;
gl_Position.w = 1.0f;
gl_ViewportIndex = int( vertex.z );
}

40
data/CommonMaterials/Common/GLSL/MipmapsGaussianBlur_cs.glsl

@ -0,0 +1,40 @@
//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps
//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256
@piece( data_type )vec3@end
@piece( lds_data_type )vec3@end
@piece( lds_definition )
shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];
@end
@piece( extra_params )
uniform float srcLodIdx;
@end
@piece( image_sample )
return textureLod( vkSampler2D( inputImage, inputSampler ), f2SamplePosition, srcLodIdx ).xyz;
@end
//Overwrite these so that num_thread_groups gets correctly calculated by accounting LOD.
@pset( texture0_width, width_with_lod )
@pset( texture0_height, height_with_lod )
@pset( downscale_lq, 1 )
@piece( image_store )
@property( downscale_lq )
@foreach( 2, iPixel )
imageStore( outputImage, ivec2( i2Center + @iPixel * i2Inc ), vec4( outColour[ @iPixel ], 1.0 ) );@end
@end @property( !downscale_lq )
@foreach( 2, iPixel )
imageStore( outputImage, ivec2( i2Center + @iPixel * i2Inc ),
vec4( (outColour[ @iPixel * 2 ] + outColour[ @iPixel * 2 + 1 ]) * 0.5, 1.0 ) );@end
@end
@end

52
data/CommonMaterials/Common/GLSL/PccDepthCompressor_ps.glsl

@ -0,0 +1,52 @@
#version ogre_glsl_ver_330
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define float3x3 mat3
#define mul( x, y ) ((x) * (y))
#define INLINE
#ifdef VULKAN
#define OGRE_Sample( tex, sampler, uv ) texture( sampler2D( tex, sampler ), uv )
#else
#define OGRE_Sample( tex, sampler, uv ) texture( tex, uv )
#endif
vulkan_layout( ogre_t0 ) uniform texture2D depthTexture;
vulkan( layout( ogre_s0 ) uniform sampler pointSampler );
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float2 projectionParams;
uniform float3 probeShapeHalfSize;
uniform float3 cameraPosLS;
uniform float3x3 viewSpaceToProbeLocalSpace;
vulkan( }; )
#define p_projectionParams projectionParams
#define p_probeShapeHalfSize probeShapeHalfSize
#define p_cameraPosLS cameraPosLS
#define p_viewSpaceToProbeLocalSpace viewSpaceToProbeLocalSpace
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
vec3 cameraDir;
} inPs;
#define HEADER
#include "PccDepthCompressor_ps.any"
#undef HEADER
layout( location = 0 ) out float4 fragColour;
void main()
{
#include "PccDepthCompressor_ps.any"
//RGB writes should be masked off
fragColour = float4( 0, 0, 0, alpha );
}

28
data/CommonMaterials/Common/GLSL/QuadCameraDirNoUV_vs.glsl

@ -0,0 +1,28 @@
#version ogre_glsl_ver_330
vulkan_layout( OGRE_POSITION ) in vec2 vertex;
vulkan_layout( OGRE_NORMAL ) in vec3 normal;
vulkan( layout( ogre_P0 ) uniform Params { )
uniform vec2 rsDepthRange;
uniform mat4 worldViewProj;
vulkan( }; )
out gl_PerVertex
{
vec4 gl_Position;
};
vulkan_layout( location = 0 )
out block
{
vec3 cameraDir;
} outVs;
void main()
{
gl_Position.xy = (worldViewProj * vec4( vertex.xy, 0, 1.0f )).xy;
gl_Position.z = rsDepthRange.y;