Browse Source

Ogre now successfully creates window

SDLAttempt
Macoy Madson 3 years ago
parent
commit
d2d0d5f103
  1. 5
      .gitignore
  2. 4
      Build_Debug.sh
  3. 12
      ReadMe.org
  4. 90
      src/OgreInitialize.cpp
  5. 8
      test/CopyOgreData.sh
  6. 1
      test/Jamfile
  7. 47
      test/data/CommonMaterials/Common/Any/PccDepthCompressor_ps.any
  8. 225
      test/data/CommonMaterials/Common/Copyback.material
  9. 65
      test/data/CommonMaterials/Common/DPM.material
  10. 134
      test/data/CommonMaterials/Common/DPSM.material
  11. 67
      test/data/CommonMaterials/Common/DepthUtils.material
  12. 117
      test/data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material
  13. 101
      test/data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material.json
  14. 18
      test/data/CommonMaterials/Common/GLSL/Copyback_1xFP32_ps.glsl
  15. 22
      test/data/CommonMaterials/Common/GLSL/Copyback_4xFP32_2DArray_ps.glsl
  16. 18
      test/data/CommonMaterials/Common/GLSL/Copyback_4xFP32_ps.glsl
  17. 28
      test/data/CommonMaterials/Common/GLSL/CubeToDpm_4xFP16_ps.glsl
  18. 37
      test/data/CommonMaterials/Common/GLSL/CubeToDpsm_ps.glsl
  19. 17
      test/data/CommonMaterials/Common/GLSL/DepthDownscaleMax_ps.glsl
  20. 22
      test/data/CommonMaterials/Common/GLSL/EsmGaussianBlurLogFilter_cs.glsl
  21. 263
      test/data/CommonMaterials/Common/GLSL/GaussianBlurBase_cs.glsl
  22. 285
      test/data/CommonMaterials/Common/GLSL/GaussianBlurLogFilterBase_cs.glsl
  23. 49
      test/data/CommonMaterials/Common/GLSL/GaussianBlurLogFilter_ps.glsl
  24. 9
      test/data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_ps.glsl
  25. 31
      test/data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_vs.glsl
  26. 40
      test/data/CommonMaterials/Common/GLSL/MipmapsGaussianBlur_cs.glsl
  27. 52
      test/data/CommonMaterials/Common/GLSL/PccDepthCompressor_ps.glsl
  28. 28
      test/data/CommonMaterials/Common/GLSL/QuadCameraDirNoUV_vs.glsl
  29. 28
      test/data/CommonMaterials/Common/GLSL/QuadCameraDir_vs.glsl
  30. 25
      test/data/CommonMaterials/Common/GLSL/Quad_vs.glsl
  31. 42
      test/data/CommonMaterials/Common/GLSL/RadialDensityMask_ps.glsl
  32. 28
      test/data/CommonMaterials/Common/GLSL/RadialDensityMask_vs.glsl
  33. 19
      test/data/CommonMaterials/Common/GLSL/Resolve_1xFP32_Subsample0_ps.glsl
  34. 20
      test/data/CommonMaterials/Common/GLSL/SkyCubemap_ps.glsl
  35. 37
      test/data/CommonMaterials/Common/GLSL/SkyEquirectangular_ps.glsl
  36. 35
      test/data/CommonMaterials/Common/GLSLES/CubeToDpsm_ps.glsles
  37. 25
      test/data/CommonMaterials/Common/GLSLES/QuadCameraDir_vs.glsles
  38. 22
      test/data/CommonMaterials/Common/GLSLES/Quad_vs.glsles
  39. 8
      test/data/CommonMaterials/Common/HLSL/Copyback_1xFP32_ps.hlsl
  40. 8
      test/data/CommonMaterials/Common/HLSL/Copyback_4xFP32_2DArray_ps.hlsl
  41. 8
      test/data/CommonMaterials/Common/HLSL/Copyback_4xFP32_ps.hlsl
  42. 25
      test/data/CommonMaterials/Common/HLSL/CubeToDpm_4xFP16_ps.hlsl
  43. 30
      test/data/CommonMaterials/Common/HLSL/CubeToDpsm_ps.hlsl
  44. 21
      test/data/CommonMaterials/Common/HLSL/DepthDownscaleMax_ps.hlsl
  45. 22
      test/data/CommonMaterials/Common/HLSL/EsmGaussianBlurLogFilter_cs.hlsl
  46. 242
      test/data/CommonMaterials/Common/HLSL/GaussianBlurBase_cs.hlsl
  47. 263
      test/data/CommonMaterials/Common/HLSL/GaussianBlurLogFilterBase_cs.hlsl
  48. 43
      test/data/CommonMaterials/Common/HLSL/GaussianBlurLogFilter_ps.hlsl
  49. 4
      test/data/CommonMaterials/Common/HLSL/HiddenAreaMeshVr_ps.hlsl
  50. 27
      test/data/CommonMaterials/Common/HLSL/HiddenAreaMeshVr_vs.hlsl
  51. 37
      test/data/CommonMaterials/Common/HLSL/MipmapsGaussianBlur_cs.hlsl
  52. 39
      test/data/CommonMaterials/Common/HLSL/PccDepthCompressor_ps.hlsl
  53. 31
      test/data/CommonMaterials/Common/HLSL/QuadCameraDirNoUV_vs.hlsl
  54. 31
      test/data/CommonMaterials/Common/HLSL/QuadCameraDir_vs.hlsl
  55. 25
      test/data/CommonMaterials/Common/HLSL/Quad_vs.hlsl
  56. 35
      test/data/CommonMaterials/Common/HLSL/RadialDensityMask_ps.hlsl
  57. 31
      test/data/CommonMaterials/Common/HLSL/RadialDensityMask_vs.hlsl
  58. 16
      test/data/CommonMaterials/Common/HLSL/Resolve_1xFP32_Subsample0_ps.hlsl
  59. 16
      test/data/CommonMaterials/Common/HLSL/SkyCubemap_ps.hlsl
  60. 25
      test/data/CommonMaterials/Common/HLSL/SkyEquirectangular_ps.hlsl
  61. 95
      test/data/CommonMaterials/Common/HiddenAreaMeshVr.material
  62. 17
      test/data/CommonMaterials/Common/Metal/Copyback_1xFP32_ps.metal
  63. 18
      test/data/CommonMaterials/Common/Metal/Copyback_4xFP32_2DArray_ps.metal
  64. 17
      test/data/CommonMaterials/Common/Metal/Copyback_4xFP32_ps.metal
  65. 28
      test/data/CommonMaterials/Common/Metal/CubeToDpm_4xFP16_ps.metal
  66. 37
      test/data/CommonMaterials/Common/Metal/CubeToDpsm_ps.metal
  67. 33
      test/data/CommonMaterials/Common/Metal/DepthDownscaleMax_ps.metal
  68. 25
      test/data/CommonMaterials/Common/Metal/EsmGaussianBlurLogFilter_cs.metal
  69. 269
      test/data/CommonMaterials/Common/Metal/GaussianBlurBase_cs.metal
  70. 284
      test/data/CommonMaterials/Common/Metal/GaussianBlurLogFilterBase_cs.metal
  71. 51
      test/data/CommonMaterials/Common/Metal/GaussianBlurLogFilter_ps.metal
  72. 7
      test/data/CommonMaterials/Common/Metal/HiddenAreaMeshVr_ps.metal
  73. 35
      test/data/CommonMaterials/Common/Metal/HiddenAreaMeshVr_vs.metal
  74. 42
      test/data/CommonMaterials/Common/Metal/MipmapsGaussianBlur_cs.metal
  75. 45
      test/data/CommonMaterials/Common/Metal/PccDepthCompressor_ps.metal
  76. 37
      test/data/CommonMaterials/Common/Metal/QuadCameraDirNoUV_vs.metal
  77. 32
      test/data/CommonMaterials/Common/Metal/QuadCameraDir_vs.metal
  78. 28
      test/data/CommonMaterials/Common/Metal/Quad_vs.metal
  79. 41
      test/data/CommonMaterials/Common/Metal/RadialDensityMask_ps.metal
  80. 38
      test/data/CommonMaterials/Common/Metal/RadialDensityMask_vs.metal
  81. 17
      test/data/CommonMaterials/Common/Metal/Resolve_1xFP32_Subsample0_ps.metal
  82. 18
      test/data/CommonMaterials/Common/Metal/SkyCubemap_ps.metal
  83. 27
      test/data/CommonMaterials/Common/Metal/SkyEquirectangular_ps.metal
  84. 87
      test/data/CommonMaterials/Common/Mipmaps.material.json
  85. 74
      test/data/CommonMaterials/Common/PccDepthCompressor.material
  86. 120
      test/data/CommonMaterials/Common/Quad.program
  87. 90
      test/data/CommonMaterials/Common/RadialDensityMask.material
  88. 137
      test/data/CommonMaterials/Common/Sky.material
  89. BIN
      test/data/CommonMaterials/Common/brtfLutDfg.dds
  90. BIN
      test/data/CommonMaterials/Common/ltcMatrix0.dds
  91. BIN
      test/data/CommonMaterials/Common/ltcMatrix1.dds
  92. 115
      test/data/Hlms/Common/Any/Cubemap_piece_all.any
  93. 17
      test/data/Hlms/Common/Any/DualParaboloid_piece_ps.any
  94. 10
      test/data/Hlms/Common/Any/ReverseDepthHelpers_piece_ps.any
  95. 44
      test/data/Hlms/Common/Any/ShadowCaster_piece_ps.any
  96. 47
      test/data/Hlms/Common/Any/ShadowCaster_piece_vs.any
  97. 7
      test/data/Hlms/Common/Any/UnpackHelpers_piece_all.any
  98. 224
      test/data/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl
  99. 104
      test/data/Hlms/Common/GLSL/Matrix_piece_all.glsl
  100. 55
      test/data/Hlms/Common/GLSL/QuaternionCode_piece_all.glsl

5
.gitignore

@ -36,4 +36,7 @@
cakelisp_cache/
bin/
lib/
lib/
test/ogreApp
test/output/

4
Build_Debug.sh

@ -3,4 +3,6 @@
./Dependencies/cakelisp/bin/cakelisp src/OgreCore.cake \
&& jam -j4 -sDEBUG_BUILD=true libGameLib.a
./Dependencies/cakelisp/bin/cakelisp test/OgreApp.cake && cd test && jam -j4 -sDEBUG_BUILD=true
# TODO: Figure out how to remove clean by teaching jam about libGameLib.a (dyn link instead?)
./Dependencies/cakelisp/bin/cakelisp test/OgreApp.cake && cd test \
&& jam clean && jam -j4 -sDEBUG_BUILD=true

12
ReadMe.org

@ -3,8 +3,18 @@
GameLib is my library for making games. It is the successor to [[https://macoy.me/code/macoy/base2.0][base2.0]].
* Setup
Clone the repository and its dependencies:
#+BEGIN_SRC sh
git clone https://macoy.me/code/macoy/gamelib.git
git submodule update --init --recursive
#+END_SRC
Build dependencies:
#+BEGIN_SRC sh
./BuildDependencies_Debug.sh
#+END_SRC
Build:
#+BEGIN_SRC sh
./Build_Debug.sh
#+END_SRC

90
src/OgreInitialize.cpp

@ -1,3 +1,5 @@
#include <stdio.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Woverloaded-virtual"
#include "OgreArchiveManager.h"
@ -121,7 +123,7 @@ void OgreInitialize()
using namespace Ogre;
const String pluginsFolder = "./data/";
const String writeAccessFolder = "./";
const String writeAccessFolder = "./output/";
#ifndef OGRE_STATIC_LIB
#if OGRE_DEBUG_MODE
@ -134,41 +136,58 @@ void OgreInitialize()
writeAccessFolder + "ogre.cfg", //
writeAccessFolder + "Ogre.log");
// This allows the user to configure the graphics. It's damn annoying during dev though
// TODO: Make this return false and quit the app
if (!root->showConfigDialog())
return; // false
// if (!root->showConfigDialog())
// return; // false
// Initialize Root
root->getRenderSystem()->setConfigOption("sRGB Gamma Conversion", "Yes");
// Window* window = root->initialise(/*autoCreateWindow=*/true, "Spargus Ogre");
root->initialise(/*autoCreateWindow=*/true);
Ogre::NameValuePairList windowSettings;
// unsigned long winHandle = reinterpret_cast<unsigned long>(win.getBase()->getSystemHandle());
// #ifdef _WIN32
// unsigned long winGlContext = reinterpret_cast<unsigned long>(wglGetCurrentContext());
// windowSettings["externalWindowHandle"] = StringConverter::toString(winHandle);
// windowSettings["externalGLContext"] = StringConverter::toString(winGlContext);
// windowSettings["externalGLControl"] = String("True");
// #else
// // Deprecated. See ogre-next/RenderSystems/GL3Plus/src/windowing/GLX/OgreGLXWindow.cpp:237
// // windowSettings["externalWindowHandle"] = StringConverter::toString(winHandle);
// windowSettings["parentWindowHandle"] = StringConverter::toString(winHandle);
// // sf::Context context;
// // unsigned long activeContextId = (unsigned long)context.getActiveContextId();
// windowSettings["currentGLContext"] = String("True");
// // windowSettings["currentGLContext"] = StringConverter::toString(activeContextId);
// // windowSettings["externalGLControl"] = String("True");
// #endif
int winWidth = 1024;
int winHeight = 1024;
Window* window =
root->createRenderWindow("GameLib", winWidth, winHeight, true, &windowSettings);
// window->_setVisible(true);
RenderSystem* renderSystem = root->getRenderSystemByName("OpenGL 3+ Rendering Subsystem");
if (!renderSystem)
{
printf("Render system not found!\n");
return;
}
// renderSystem->setConfigOption("Display Frequency", "N/A");
renderSystem->setConfigOption("Full Screen", "No");
// renderSystem->setConfigOption("VSync", "Yes");
renderSystem->setConfigOption("Video Mode", "1920 x 1080");
renderSystem->setConfigOption("sRGB Gamma Conversion", "Yes");
root->setRenderSystem(renderSystem);
Window* window = root->initialise(/*autoCreateWindow=*/true, "GameLib");
// Old bad attempt at Ogre within SFML (only party worked)
{
// Ogre::NameValuePairList windowSettings;
// unsigned long winHandle = reinterpret_cast<unsigned
// long>(win.getBase()->getSystemHandle()); #ifdef _WIN32 unsigned long winGlContext =
// reinterpret_cast<unsigned long>(wglGetCurrentContext());
// windowSettings["externalWindowHandle"] = StringConverter::toString(winHandle);
// windowSettings["externalGLContext"] = StringConverter::toString(winGlContext);
// windowSettings["externalGLControl"] = String("True");
// #else
// // Deprecated. See
// ogre-next/RenderSystems/GL3Plus/src/windowing/GLX/OgreGLXWindow.cpp:237
// // windowSettings["externalWindowHandle"] = StringConverter::toString(winHandle);
// windowSettings["parentWindowHandle"] = StringConverter::toString(winHandle);
// // sf::Context context;
// // unsigned long activeContextId = (unsigned long)context.getActiveContextId();
// windowSettings["currentGLContext"] = String("True");
// // windowSettings["currentGLContext"] = StringConverter::toString(activeContextId);
// // windowSettings["externalGLControl"] = String("True");
// #endif
// int winWidth = 1920;
// int winHeight = 1080;
// Window* window =
// // root->createRenderWindow("GameLib", winWidth, winHeight, true, &windowSettings);
// root->createRenderWindow("GameLib", winWidth, winHeight, true);
// window->_setVisible(true);
}
registerHlms();
@ -189,7 +208,7 @@ void OgreInitialize()
// Setup a basic compositor with a blue clear colour
CompositorManager2* compositorManager = root->getCompositorManager2();
const String workspaceName("Demo Workspace");
const String workspaceName("Main Workspace");
// const IdString definitionNameId = workspaceName;
const ColourValue backgroundColour(0.2f, 0.4f, 0.6f);
compositorManager->createBasicWorkspaceDef(workspaceName, backgroundColour, IdString());
@ -197,6 +216,7 @@ void OgreInitialize()
true);
// Mesh importing
if (false)
{
Ogre::ResourceGroupManager::getSingleton().addResourceLocation("data/Models", "FileSystem",
"Models");
@ -237,7 +257,7 @@ void OgreInitialize()
{
return;
}
// WTF
// WTF - this should work, but it's almost like the root scene node's vtable is wrong
// Ogre::SceneNode* sceneNode = rootSceneNode->createChildSceneNode(Ogre::SCENE_DYNAMIC);
Ogre::SceneNode* sceneNode = static_cast<SceneNode*>(
rootSceneNode->createChild()); // rootSceneNode->createChildSceneNode();

8
test/CopyOgreData.sh

@ -0,0 +1,8 @@
#!/bin/sh
mkdir -p data/Hlms || exit $?
mkdir -p data/CommonMaterials || exit $?
rsync -av ../Dependencies/ogre-next/Samples/Media/Hlms/Common data/Hlms/ || exit $?
rsync -av ../Dependencies/ogre-next/Samples/Media/Hlms/Pbs data/Hlms/ || exit $?
rsync -av ../Dependencies/ogre-next/Samples/Media/Hlms/Unlit data/Hlms/ || exit $?
rsync -av ../Dependencies/ogre-next/Samples/Media/2.0/scripts/materials/Common data/CommonMaterials || exit $?

1
test/Jamfile

@ -1,3 +1,4 @@
SubDir . ;
# TODO: This should depend on libGamelib such that Jam knows it needs to re-link if Gamelib updates
Main ogreApp : OgreApp.cake.cpp ;

47
test/data/CommonMaterials/Common/Any/PccDepthCompressor_ps.any

@ -0,0 +1,47 @@
//#include "SyntaxHighlightingMisc.h"
#ifdef HEADER
/**
Finds the intersection between the cube of half size probeShapeHalfSize and center at origin
and the reflDir, so that posLS + reflDirLS * retVal = intersectionPos;
*/
INLINE float findIntersectionDistance( float3 reflDirLS, float3 posLS, float3 probeShapeHalfSize )
{
//Find the ray intersection with box plane
float3 invReflDirLS = float3( 1.0, 1.0, 1.0 ) / reflDirLS;
float3 intersectAtMinPlane = ( -probeShapeHalfSize - posLS ) * invReflDirLS;
float3 intersectAtMaxPlane = ( probeShapeHalfSize - posLS ) * invReflDirLS;
//Get the largest intersection values (we are not interested in negative values)
float3 largestIntersect = max( intersectAtMaxPlane.xyz, intersectAtMinPlane.xyz );
//Get the closest of all solutions
float distance = min( min( largestIntersect.x, largestIntersect.y ), largestIntersect.z );
return distance;
}
#else
float fDepth = OGRE_Sample( depthTexture, pointSampler, inPs.uv0 ).x;
float linearDepth = p_projectionParams.y / (fDepth - p_projectionParams.x);
float3 viewSpacePosition = inPs.cameraDir * linearDepth;
float fDist = length( viewSpacePosition.xyz );
float3 probeToPosDir = viewSpacePosition / fDist;
probeToPosDir = mul( p_viewSpaceToProbeLocalSpace, probeToPosDir );
float fApproxDist = findIntersectionDistance( probeToPosDir, p_cameraPosLS, p_probeShapeHalfSize );
//We can't store fDist directly because we have limited precision (often 8 bits)
//Thus we store it in terms of
// fApproxDist * alpha = fDist;
//During render we'll know fApproxDist and alpha, but want to know fDist
//We also know alpha >= 0
//For precision issues and because it's good enough, we force alpha <= 2.0
float alpha = fDist / fApproxDist;
alpha *= 0.5;
alpha = min( alpha, 1.0 );
#endif

225
test/data/CommonMaterials/Common/Copyback.material

@ -0,0 +1,225 @@
fragment_program Ogre/Copy/4xFP32_ps_HLSL hlsl
{
source Copyback_4xFP32_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Copy/4xFP32_ps_GLSL glsl
{
source Copyback_4xFP32_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Copy/4xFP32_ps_VK glslvk
{
source Copyback_4xFP32_ps.glsl
}
fragment_program Ogre/Copy/4xFP32_ps_Metal metal
{
source Copyback_4xFP32_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Copy/4xFP32_ps unified
{
delegate Ogre/Copy/4xFP32_ps_GLSL
delegate Ogre/Copy/4xFP32_ps_VK
delegate Ogre/Copy/4xFP32_ps_HLSL
delegate Ogre/Copy/4xFP32_ps_Metal
}
material Ogre/Copy/4xFP32
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Copy/4xFP32_ps
{
}
texture_unit
{
filtering none
tex_address_mode clamp
}
}
}
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_HLSL hlsl
{
source Copyback_4xFP32_2DArray_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_GLSL glsl
{
source Copyback_4xFP32_2DArray_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_VK glslvk
{
source Copyback_4xFP32_2DArray_ps.glsl
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_Metal metal
{
source Copyback_4xFP32_2DArray_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps unified
{
delegate Ogre/Copy/4xFP32_2DArray_ps_GLSL
delegate Ogre/Copy/4xFP32_2DArray_ps_VK
delegate Ogre/Copy/4xFP32_2DArray_ps_HLSL
delegate Ogre/Copy/4xFP32_2DArray_ps_Metal
}
material Ogre/Copy/4xFP32_2DArray
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Copy/4xFP32_2DArray_ps
{
param_named sliceIdx float 0
}
texture_unit
{
filtering none
tex_address_mode clamp
}
}
}
}
fragment_program Ogre/Copy/1xFP32_ps_HLSL hlsl
{
source Copyback_1xFP32_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Copy/1xFP32_ps_GLSL glsl
{
source Copyback_1xFP32_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Copy/1xFP32_ps_VK glslvk
{
source Copyback_1xFP32_ps.glsl
}
fragment_program Ogre/Copy/1xFP32_ps_Metal metal
{
source Copyback_1xFP32_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Copy/1xFP32_ps unified
{
delegate Ogre/Copy/1xFP32_ps_GLSL
delegate Ogre/Copy/1xFP32_ps_VK
delegate Ogre/Copy/1xFP32_ps_HLSL
delegate Ogre/Copy/1xFP32_ps_Metal
}
material Ogre/Copy/1xFP32 : Ogre/Copy/4xFP32
{
technique
{
pass
{
fragment_program_ref Ogre/Copy/1xFP32_ps
{
}
}
}
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_GLSL glsl
{
source Resolve_1xFP32_Subsample0_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_VK glslvk
{
source Resolve_1xFP32_Subsample0_ps.glsl
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_HLSL hlsl
{
source Resolve_1xFP32_Subsample0_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_Metal metal
{
source Resolve_1xFP32_Subsample0_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps unified
{
delegate Ogre/Resolve/1xFP32_Subsample0_ps_GLSL
delegate Ogre/Resolve/1xFP32_Subsample0_ps_VK
delegate Ogre/Resolve/1xFP32_Subsample0_ps_HLSL
delegate Ogre/Resolve/1xFP32_Subsample0_ps_Metal
}
material Ogre/Resolve/1xFP32_Subsample0
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Resolve/1xFP32_Subsample0_ps
{
}
texture_unit
{
filtering none
tex_address_mode clamp
}
}
}
}

65
test/data/CommonMaterials/Common/DPM.material

@ -0,0 +1,65 @@
//DPM stands for Dual Parabolloid Mapping.
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL glsl
{
source CubeToDpm_4xFP16_ps.glsl
default_params
{
param_named cubeTexture int 0
}
}
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_VK glslvk
{
source CubeToDpm_4xFP16_ps.glsl
}
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL hlsl
{
source CubeToDpm_4xFP16_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_Metal metal
{
source CubeToDpm_4xFP16_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps unified
{
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_VK
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_Metal
}
// Converts a cubemap to DPM in the pixel shader.
material Ogre/DPM/CubeToDpm
{
technique
{
pass
{
depth_check off
depth_func always_pass
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/DPM/CubeToDpm_4xFP16_ps
{
}
texture_unit depthTexture
{
filtering bilinear
tex_address_mode clamp
}
}
}
}

134
test/data/CommonMaterials/Common/DPSM.material

@ -0,0 +1,134 @@
//DPSM stands for Dual Parabolloid Shadow Mapping.
fragment_program Ogre/DPSM/CubeToDpsm_ps_GLSL glsl
{
source CubeToDpsm_ps.glsl
default_params
{
param_named depthTexture int 0
}
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_GLSL glsl : Ogre/DPSM/CubeToDpsm_ps_GLSL
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps_VK glslvk
{
source CubeToDpsm_ps.glsl
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_VK glslvk : Ogre/DPSM/CubeToDpsm_ps_VK
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps_GLSLES glsles
{
source CubeToDpsm_ps.glsles
default_params
{
param_named depthTexture int 0
}
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_GLSLES glsles : Ogre/DPSM/CubeToDpsm_ps_GLSLES
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps_HLSL hlsl
{
source CubeToDpsm_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_HLSL hlsl : Ogre/DPSM/CubeToDpsm_ps_HLSL
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps_Metal metal
{
source CubeToDpsm_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_Metal metal : Ogre/DPSM/CubeToDpsm_ps_Metal
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}
fragment_program Ogre/DPSM/CubeToDpsm_ps unified
{
delegate Ogre/DPSM/CubeToDpsm_ps_GLSL
delegate Ogre/DPSM/CubeToDpsm_ps_GLSLES
delegate Ogre/DPSM/CubeToDpsm_ps_VK
delegate Ogre/DPSM/CubeToDpsm_ps_HLSL
delegate Ogre/DPSM/CubeToDpsm_ps_Metal
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps unified
{
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_GLSL
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_GLSLES
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_VK
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_HLSL
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_Metal
}
// Converts a cubemap to DPSM in the pixel shader.
material Ogre/DPSM/CubeToDpsm
{
technique
{
pass
{
//Depth writes must be on, since we write directly to the depth buffer.
depth_check on
depth_write on
depth_func always_pass
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/DPSM/CubeToDpsm_ps
{
}
texture_unit depthTexture
{
filtering none
tex_address_mode clamp
}
}
}
}
material Ogre/DPSM/CubeToDpsmColour
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/DPSM/CubeToDpsm_Colour_ps
{
}
texture_unit depthTexture
{
filtering none
tex_address_mode clamp
}
}
}
}

67
test/data/CommonMaterials/Common/DepthUtils.material

@ -0,0 +1,67 @@
fragment_program Ogre/Depth/DownscaleMax_ps_GLSL glsl
{
source DepthDownscaleMax_ps.glsl
default_params
{
param_named depthTexture int 0
}
}
fragment_program Ogre/Depth/DownscaleMax_ps_VK glslvk
{
source DepthDownscaleMax_ps.glsl
}
fragment_program Ogre/Depth/DownscaleMax_ps_HLSL hlsl
{
source DepthDownscaleMax_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Depth/DownscaleMax_ps_Metal metal
{
source DepthDownscaleMax_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Depth/DownscaleMax_ps unified
{
delegate Ogre/Depth/DownscaleMax_ps_GLSL
delegate Ogre/Depth/DownscaleMax_ps_VK
delegate Ogre/Depth/DownscaleMax_ps_HLSL
delegate Ogre/Depth/DownscaleMax_ps_Metal
}
// Downscales resolution of input depth texture by half (w/2 x h/2)
// using a max filter (max depth of all 4 neighbours)
material Ogre/Depth/DownscaleMax
{
technique
{
pass
{
depth_check on
depth_write on
depth_func always_pass
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Depth/DownscaleMax_ps
{
}
texture_unit depthTexture
{
filtering none
tex_address_mode clamp
}
}
}
}

117
test/data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material

@ -0,0 +1,117 @@
fragment_program ESM/GaussianLogFilterH_ps_GLSL glsl
{
source GaussianBlurLogFilter_ps.glsl
preprocessor_defines NUM_WEIGHTS=9,K=80,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
default_params
{
param_named tex int 0
}
}
fragment_program ESM/GaussianLogFilterV_ps_GLSL glsl : ESM/GaussianLogFilterH_ps_GLSL
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps_VK glslvk
{
source GaussianBlurLogFilter_ps.glsl
preprocessor_defines NUM_WEIGHTS=9,K=80,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
}
fragment_program ESM/GaussianLogFilterV_ps_VK glslvk : ESM/GaussianLogFilterH_ps_VK
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps_HLSL hlsl
{
source GaussianBlurLogFilter_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
}
fragment_program ESM/GaussianLogFilterV_ps_HLSL hlsl : ESM/GaussianLogFilterH_ps_HLSL
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps_Metal metal
{
source GaussianBlurLogFilter_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
}
fragment_program ESM/GaussianLogFilterV_ps_Metal metal : ESM/GaussianLogFilterH_ps_Metal
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps unified
{
delegate ESM/GaussianLogFilterH_ps_GLSL
delegate ESM/GaussianLogFilterH_ps_VK
delegate ESM/GaussianLogFilterH_ps_HLSL
delegate ESM/GaussianLogFilterH_ps_Metal
default_params
{
param_named weights float9 0.013960189 0.022308320 0.033488754 0.047226712 0.062565230 0.077863686 0.091031872 0.099978946 0.10315263
}
}
fragment_program ESM/GaussianLogFilterV_ps unified
{
delegate ESM/GaussianLogFilterV_ps_GLSL
delegate ESM/GaussianLogFilterV_ps_VK
delegate ESM/GaussianLogFilterV_ps_HLSL
delegate ESM/GaussianLogFilterV_ps_Metal
default_params
{
param_named weights float9 0.013960189 0.022308320 0.033488754 0.047226712 0.062565230 0.077863686 0.091031872 0.099978946 0.10315263
}
}
material ESM/GaussianLogFilterH
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref ESM/GaussianLogFilterH_ps
{
}
texture_unit tex
{
filtering none
tex_address_mode clamp
}
}
}
}
material ESM/GaussianLogFilterV : ESM/GaussianLogFilterH
{
technique
{
pass
{
fragment_program_ref ESM/GaussianLogFilterV_ps
{
}
}
}
}

101
test/data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material.json

@ -0,0 +1,101 @@
{
"samplers" :
{
"PointClamp" :
{
"min" : "point",
"mag" : "point",
"mip" : "point",
"u" : "clamp",
"v" : "clamp",
"w" : "clamp",
"miplodbias" : 0,
"max_anisotropic" : 1,
"compare_function" : "disabled",
"border" : [1, 1, 1, 1],
"min_lod" : -3.40282347E+38,
"max_lod" : 3.40282347E+38
}
},
"compute" :
{
"ESM/GaussianLogFilterH" :
{
"threads_per_group" : [32, 2, 1],
"thread_groups" : [8, 512, 1],
"source" : "GaussianBlurLogFilterBase_cs",
"pieces" : ["EsmGaussianBlurLogFilter_cs"],
"inform_shader_of_texture_data_change" : true,
"uav_units" : 1,
"textures" :
[
{
"sampler" : "PointClamp"
}
],
"params" :
[
["g_f4OutputSize", "packed_texture_size", 0],
["c_weights[0]", [0.013960189, 0.022308320, 0.033488754, 0.047226712,
0.062565230, 0.077863686, 0.091031872, 0.099978946, 0.10315263]]
],
"params_glsl" :
[
["inputImage", [0], "int"],
["outputImage", [0], "int"]
],
"properties" :
{
"horizontal_pass" : 1,
"kernel_radius" : 8,
"K" : 80
}
},
"ESM/GaussianLogFilterV" :
{
"threads_per_group" : [32, 2, 1],
"thread_groups" : [512, 8, 1],
"source" : "GaussianBlurLogFilterBase_cs",
"pieces" : ["EsmGaussianBlurLogFilter_cs"],
"inform_shader_of_texture_data_change" : true,
"uav_units" : 1,
"textures" :
[
{
"sampler" : "PointClamp"
}
],
"params" :
[
["g_f4OutputSize", "packed_texture_size", 0],
["c_weights[0]", [0.013960189, 0.022308320, 0.033488754, 0.047226712,
0.062565230, 0.077863686, 0.091031872, 0.099978946, 0.10315263]]
],
"params_glsl" :
[
["inputImage", [0], "int"],
["outputImage", [0], "int"]
],
"properties" :
{
"horizontal_pass" : 0,
"kernel_radius" : 8,
"K" : 80
}
}
}
}

18
test/data/CommonMaterials/Common/GLSL/Copyback_1xFP32_ps.glsl

@ -0,0 +1,18 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2D tex;
vulkan( layout( ogre_s0 ) uniform sampler texSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out float fragColour;
void main()
{
fragColour = texture( vkSampler2D( tex, texSampler ), inPs.uv0 ).x;
}

22
test/data/CommonMaterials/Common/GLSL/Copyback_4xFP32_2DArray_ps.glsl

@ -0,0 +1,22 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2DArray tex;
vulkan( layout( ogre_s0 ) uniform sampler texSampler );
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float sliceIdx;
vulkan( }; )
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out vec4 fragColour;
void main()
{
fragColour = texture( vkSampler2DArray( tex, texSampler ), vec3( inPs.uv0, sliceIdx ) );
}

18
test/data/CommonMaterials/Common/GLSL/Copyback_4xFP32_ps.glsl

@ -0,0 +1,18 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2D tex;
vulkan( layout( ogre_s0 ) uniform sampler texSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out vec4 fragColour;
void main()
{
fragColour = texture( vkSampler2D( tex, texSampler ), inPs.uv0 );
}

28
test/data/CommonMaterials/Common/GLSL/CubeToDpm_4xFP16_ps.glsl

@ -0,0 +1,28 @@
#version ogre_glsl_ver_330
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float lodLevel;
vulkan( }; )
vulkan_layout( ogre_t0 ) uniform textureCube cubeTexture;
vulkan( layout( ogre_s0 ) uniform sampler cubeSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out vec4 fragColour;
void main()
{
vec3 cubeDir;
cubeDir.x = mod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0;
cubeDir.y = inPs.uv0.y * 2.0 - 1.0;
cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y);
cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z;
fragColour.xyzw = textureLod( vkSamplerCube( cubeTexture, cubeSampler ), cubeDir.xyz, lodLevel ).xyzw;
}

37
test/data/CommonMaterials/Common/GLSL/CubeToDpsm_ps.glsl

@ -0,0 +1,37 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform textureCube depthTexture;
vulkan( layout( ogre_s0 ) uniform sampler cubeSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
in vec4 gl_FragCoord;
//out float gl_FragDepth;
#ifdef OUTPUT_TO_COLOUR
vulkan_layout( location = 0 )
out float fragColour;
#endif
void main()
{
vec3 cubeDir;
cubeDir.x = mod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0;
cubeDir.y = inPs.uv0.y * 2.0 - 1.0;
cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y);
cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z;
float depthValue = textureLod( vkSamplerCube( depthTexture, cubeSampler ), cubeDir.xyz, 0 ).x;
#ifdef OUTPUT_TO_COLOUR
fragColour = depthValue;
#else
gl_FragDepth = depthValue;
#endif
}

17
test/data/CommonMaterials/Common/GLSL/DepthDownscaleMax_ps.glsl

@ -0,0 +1,17 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2D depthTexture;
in vec4 gl_FragCoord;
//out float gl_FragDepth;
void main()
{
float fDepth0 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0), 0 ).x;
float fDepth1 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 0, 1 ), 0 ).x;
float fDepth2 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 1, 0 ), 0 ).x;
float fDepth3 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 1, 1 ), 0 ).x;
//gl_FragDepth = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0), 0 ).x;
gl_FragDepth = max( max( fDepth0, fDepth1 ), max( fDepth2, fDepth3 ) );
}

22
test/data/CommonMaterials/Common/GLSL/EsmGaussianBlurLogFilter_cs.glsl

@ -0,0 +1,22 @@
//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps
//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256
@piece( data_type )float@end
@piece( lds_data_type )float@end
@piece( lds_definition )shared float g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];@end
@piece( image_sample )
return textureLod( vkSampler2D( inputImage, inputSampler ), f2SamplePosition, 0 ).x;
@end
@piece( image_store )
@foreach( 4, iPixel )
imageStore( outputImage, ivec2( i2Center + @iPixel * i2Inc ), vec4( outColour[ @iPixel ], 0, 0, 1.0 ) );@end
@end

263
test/data/CommonMaterials/Common/GLSL/GaussianBlurBase_cs.glsl

@ -0,0 +1,263 @@
@property( syntax != glslvk )
#version 430
@else
#version 450
@end
//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps
//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256
// For this shader to work, several pieces need to be defined:
// data_type (i.e. vec3)
// lds_data_type (i.e. vec3, uint)
// lds_definition
// image_store
// image_sample
// decode_lds (optional, i.e. when lds_data_type != data_type)
// Define the property "downscale" if you're doing a downsample.
// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale
// The script uses the template syntax to automatically set the num. of threadgroups
// based on the bound input texture.
vulkan( layout( ogre_s0 ) uniform sampler inputSampler );
@property( texture0_texture_type == TextureTypes_Type2DArray )
vulkan_layout( ogre_t0 ) uniform texture2DArray inputImage;
@else
vulkan_layout( ogre_t0 ) uniform texture2D inputImage;
@end
layout( vulkan( ogre_u0 ) vk_comma @insertpiece(uav0_pf_type) )
@property( uav0_texture_type == TextureTypes_Type2DArray )
uniform restrict writeonly image2DArray outputImage;
@else
uniform restrict writeonly image2D outputImage;
@end
// 32 = 128 / 4
layout( local_size_x = 32,
local_size_y = 2,
local_size_z = 1 ) in;
@pset( threads_per_group_x, 32 )
@pset( threads_per_group_y, 2 )
@pset( threads_per_group_z, 1 )
@pmul( pixelsPerRow, threads_per_group_x, 4 )
@pset( rowsPerThreadGroup, threads_per_group_y )
@pset( num_thread_groups_z, 1 )
@set( input_width, uav0_width_with_lod )
@set( input_height, uav0_height_with_lod )
@property( horizontal_pass )
@property( downscale ) @mul( input_width, 2 ) @end
/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow
/// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup
@add( num_thread_groups_x, input_width, pixelsPerRow )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, pixelsPerRow )
@add( num_thread_groups_y, input_height, rowsPerThreadGroup )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, rowsPerThreadGroup )
@end @property( !horizontal_pass )
@property( downscale ) @mul( input_height, 2 ) @end
/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup
/// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow
@add( num_thread_groups_x, input_width, rowsPerThreadGroup )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, rowsPerThreadGroup )
@add( num_thread_groups_y, input_height, pixelsPerRow )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, pixelsPerRow )
@end
/// shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];
@insertpiece( lds_definition )
#define C_WEIGHTS( x ) c_weights[(x) >> 2u][(x) & 3u]
// weights_array_count = ( kernel_radius + 1u + 3u ) / 4u
@padd( weights_array_count, kernel_radius, 4 )
@pdiv( weights_array_count, 4 )
vulkan( layout( ogre_P0 ) uniform Params { )
uniform vec4 g_f4OutputSize;
uniform vec4 c_weights[@value( weights_array_count )];
@insertpiece( extra_params )
vulkan( }; )
@insertpiece( lds_data_type ) sampleTex( ivec2 i2Position , vec2 f2Offset )
{
vec2 f2SamplePosition = vec2( i2Position ) + vec2( 0.5f, 0.5f );
f2SamplePosition *= g_f4OutputSize.zw;
///return textureLod( inputImage, f2SamplePosition, 0 ).xyz;
@insertpiece( image_sample )
}
void ComputeFilterKernel( int iPixelOffset, int iLineOffset, ivec2 i2Center, ivec2 i2Inc )
{
@property( !downscale_lq )
@insertpiece( data_type ) outColour[ 4 ];
@end @property( downscale_lq )
@insertpiece( data_type ) outColour[ 2 ];
@end
@insertpiece( data_type ) RDI[ 4 ] ;
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] = RDI[ @iPixel ] * C_WEIGHTS( @value( kernel_radius ) );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] = RDI[ @iPixel * 2 ] * C_WEIGHTS( @value( kernel_radius ) );@end
@end
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end
iPixelOffset += 4;
/// Deal with taps to our left.
/// for ( iIteration = 0; iIteration < radius; iIteration += 1 )
@foreach( kernel_radius, iIteration )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel ] * C_WEIGHTS( @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * C_WEIGHTS( @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end
@padd( kernel_radius_plus1, kernel_radius, 1 )
@pmul( kernel_radius2x_plus1, kernel_radius, 2 )
@padd( kernel_radius2x_plus1, 1 )
@pmul( kernel_radius2x, kernel_radius, 2 )
/// Deal with taps to our right.
/// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 )
@foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel ] * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end
/*
foreach( 4, iPixel )
imageStore( outputImage, ivec2( i2Center + iPixel * i2Inc ), vec4( outColour[ iPixel ], 1.0 ) );end
*/
@insertpiece( image_store )
}
void main()
{
/// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 )
/// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 )
@padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 )
@pdiv( samples_per_thread, samples_per_threadgroup, 32 )
@property( horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );
ivec2 i2GroupCoord = ivec2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u );
ivec2 i2Coord = ivec2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y );
@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( @i, gl_LocalInvocationID.y ) , vec2( 0.5f, 0.0f ) );@end
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), vec2( 0.5f, 0.0f ) );
}
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y );
i2Coord.x += @value( kernel_radius );
if( i2Coord.x < int(g_f4OutputSize.x) )
{
ivec2 i2Center = i2Coord + ivec2( 0, gl_LocalInvocationID.y );
ivec2 i2Inc = ivec2 ( 1, 0 );
@property( downscale )
i2Center.x = int( uint( i2Center.x ) >> 1u );
@end
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end @property( !horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );
ivec2 i2GroupCoord = ivec2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u );
ivec2 i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset );
@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( gl_LocalInvocationID.y, @i ) , vec2( 0.0f, 0.5f ) );@end
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), vec2( 0.0f, 0.5f ) );
}
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset );
i2Coord.y += @value( kernel_radius );
if( i2Coord.y < int(g_f4OutputSize.y) )
{
ivec2 i2Center = i2Coord + ivec2( gl_LocalInvocationID.y, 0 );
ivec2 i2Inc = ivec2 ( 0, 1 );
@property( downscale )
i2Center.y = int( uint( i2Center.y ) >> 1u );
@end
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end
}

285
test/data/CommonMaterials/Common/GLSL/GaussianBlurLogFilterBase_cs.glsl

@ -0,0 +1,285 @@
@property( syntax != glslvk )
#version 430
@else
#version 450
@end
//See GaussianBlurBase_cs for the original.
//This is a derived version which is used for filtering ESM (Exponential Shadow Maps).
//Normally ESM is in exponential space: exp( K * linearSpaceDepth );
//Filtering should be done in that space.
//However because of precision reasons, we store linearSpaceDepth instead. In order to perform
//correct filtering, we use the following formula:
// exp( filteredDepth ) = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 ) + ...
//
//But this is not precision friendly. So we do instead:
// = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 )
// = exp( d0 ) * ( w0 + w1 * exp( d1 ) / exp( d0 ) + w2 * exp( d2 ) / exp( d0 ) )
// = exp( d0 ) * ( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) )
// = exp( d0 ) * exp( log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) )
// = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) )
// exp( filteredDepth ) = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) )
//Almost final formula:
// filteredDepth = d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) )
//
//The formula is actually:
// exp( K * filteredDepth ) = w0 * exp( K * d0 ) + w1 * exp( K * d1 ) + w2 * exp( K * d2 ) + ...
//Final formula:
// = d0 + log( w0 + w1 * exp( K * (d1 - d0) ) + w2 * exp( K * (d2 - d0) ) ) / K
//Like in the original filter:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256
// For this shader to work, several pieces need to be defined:
// data_type (i.e. vec3)
// lds_data_type (i.e. vec3, uint)
// lds_definition
// image_store
// image_sample
// decode_lds (optional, i.e. when lds_data_type != data_type)
// Define the property "downscale" if you're doing a downsample.
// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale
// The script uses the template syntax to automatically set the num. of threadgroups
// based on the bound input texture.
vulkan( layout( ogre_s0 ) uniform sampler inputSampler );
vulkan_layout( ogre_t0 ) uniform texture2D inputImage;
layout( vulkan( ogre_u0 ) vk_comma @insertpiece(uav0_pf_type) )
uniform restrict writeonly image2D outputImage;
// 32 = 128 / 4
layout( local_size_x = 32,
local_size_y = 2,
local_size_z = 1 ) in;
@pset( threads_per_group_x, 32 )
@pset( threads_per_group_y, 2 )
@pset( threads_per_group_z, 1 )
@pmul( pixelsPerRow, threads_per_group_x, 4 )
@pset( rowsPerThreadGroup, threads_per_group_y )
@pset( num_thread_groups_z, 1 )
@set( input_width, uav0_width_with_lod )
@set( input_height, uav0_height_with_lod )
@property( horizontal_pass )
@property( downscale ) @mul( input_width, 2 ) @end
/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow
/// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup
@add( num_thread_groups_x, input_width, pixelsPerRow )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, pixelsPerRow )
@add( num_thread_groups_y, input_height, rowsPerThreadGroup )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, rowsPerThreadGroup )
@end @property( !horizontal_pass )
@property( downscale ) @mul( input_height, 2 ) @end
/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup
/// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow
@add( num_thread_groups_x, input_width, rowsPerThreadGroup )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, rowsPerThreadGroup )
@add( num_thread_groups_y, input_height, pixelsPerRow )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, pixelsPerRow )
@end
/// shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];
@insertpiece( lds_definition )
#define C_WEIGHTS( x ) c_weights[(x) >> 2u][(x) & 3u]
// weights_array_count = ( kernel_radius + 1u + 3u ) / 4u
@padd( weights_array_count, kernel_radius, 4 )
@pdiv( weights_array_count, 4 )
vulkan( layout( ogre_P0 ) uniform Params { )
uniform vec4 g_f4OutputSize;
uniform vec4 c_weights[@value( weights_array_count )];
@insertpiece( extra_params )
vulkan( }; )
@insertpiece( lds_data_type ) sampleTex( ivec2 i2Position , vec2 f2Offset )
{
vec2 f2SamplePosition = vec2( i2Position ) + vec2( 0.5f, 0.5f );
f2SamplePosition *= g_f4OutputSize.zw;
///return textureLod( inputImage, f2SamplePosition, 0 ).xyz;
@insertpiece( image_sample )
}
void ComputeFilterKernel( int iPixelOffset, int iLineOffset, ivec2 i2Center, ivec2 i2Inc )
{
@property( !downscale_lq )
@insertpiece( data_type ) outColour[ 4 ];
@insertpiece( data_type ) firstSmpl[ 4 ];
@end @property( downscale_lq )
@insertpiece( data_type ) outColour[ 2 ];
@insertpiece( data_type ) firstSmpl[ 4 ];
@end
@insertpiece( data_type ) RDI[ 4 ] ;
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end
@property( !downscale_lq )
@foreach( 4, iPixel )
firstSmpl[ @iPixel ].x = RDI[ @iPixel ];
outColour[ @iPixel ].x = C_WEIGHTS( @value( kernel_radius ) );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
firstSmpl[ @iPixel ].x = RDI[ @iPixel * 2 ];
outColour[ @iPixel ].x = C_WEIGHTS( @value( kernel_radius ) );@end
@end
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end
iPixelOffset += 4;
/// Deal with taps to our left.
/// for ( iIteration = 0; iIteration < radius; iIteration += 1 )
@foreach( kernel_radius, iIteration )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end
@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end
@padd( kernel_radius_plus1, kernel_radius, 1 )
@pmul( kernel_radius2x_plus1, kernel_radius, 2 )
@padd( kernel_radius2x_plus1, 1 )
@pmul( kernel_radius2x, kernel_radius, 2 )
/// Deal with taps to our right.
/// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 )
@foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] = firstSmpl[ @iPixel ].x + log( outColour[ @iPixel ].x ) / @value(K);@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] = firstSmpl[ @iPixel ].x + log( outColour[ @iPixel ].x ) / @value(K);@end
@end
/*
foreach( 4, iPixel )
imageStore( outputImage, ivec2( i2Center + iPixel * i2Inc ), vec4( outColour[ iPixel ], 1.0 ) );end
*/
@insertpiece( image_store )
}
void main()
{
/// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 )
/// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 )
@padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 )
@pdiv( samples_per_thread, samples_per_threadgroup, 32 )
@property( horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );
ivec2 i2GroupCoord = ivec2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u );
ivec2 i2Coord = ivec2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y );
@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( @i, gl_LocalInvocationID.y ) , vec2( 0.5f, 0.0f ) );@end
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), vec2( 0.5f, 0.0f ) );
}
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y );
i2Coord.x += @value( kernel_radius );
if( i2Coord.x < int(g_f4OutputSize.x) )
{
ivec2 i2Center = i2Coord + ivec2( 0, gl_LocalInvocationID.y );
ivec2 i2Inc = ivec2 ( 1, 0 );
@property( downscale )
i2Center.x = int( uint( i2Center.x ) >> 1u );
@end
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end @property( !horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );
ivec2 i2GroupCoord = ivec2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u );
ivec2 i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset );
@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( gl_LocalInvocationID.y, @i ) , vec2( 0.0f, 0.5f ) );@end
if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), vec2( 0.0f, 0.5f ) );
}
//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();
int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset );
i2Coord.y += @value( kernel_radius );
if( i2Coord.y < int(g_f4OutputSize.y) )
{
ivec2 i2Center = i2Coord + ivec2( gl_LocalInvocationID.y, 0 );
ivec2 i2Inc = ivec2 ( 0, 1 );
@property( downscale )
i2Center.y = int( uint( i2Center.y ) >> 1u );
@end
ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end
}

49
test/data/CommonMaterials/Common/GLSL/GaussianBlurLogFilter_ps.glsl

@ -0,0 +1,49 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2D tex;
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out float fragColour;
in vec4 gl_FragCoord;
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float weights[NUM_WEIGHTS];
vulkan( }; )
void main()
{
float val;
float outColour;
float firstSmpl;
firstSmpl = texelFetch( tex, ivec2( gl_FragCoord.xy ) - ivec2( HORIZONTAL_STEP * (NUM_WEIGHTS - 1),
VERTICAL_STEP * (NUM_WEIGHTS - 1) ), 0 ).x;
outColour = weights[0];
int i;
for( i=NUM_WEIGHTS - 1; (--i) > 0; )
{
val = texelFetch( tex, ivec2( gl_FragCoord.xy ) - ivec2( HORIZONTAL_STEP* i,
VERTICAL_STEP * i ), 0 ).x;
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-i-1];
}
val = texelFetch( tex, ivec2( gl_FragCoord.xy ), 0 ).x;
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-1];
for( i=0; i<NUM_WEIGHTS - 1; ++i )
{
val = texelFetch( tex, ivec2( gl_FragCoord.xy ) + ivec2( HORIZONTAL_STEP* (i+1),
VERTICAL_STEP * (i+1) ), 0 ).x;
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-i-2];
}
fragColour = firstSmpl + log( outColour ) / K;
}

9
test/data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_ps.glsl

@ -0,0 +1,9 @@
#version ogre_glsl_ver_330
vulkan_layout( location = 0 )
out vec4 fragColour;
void main()
{
fragColour = vec4( 0 );
}

31
test/data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_vs.glsl

@ -0,0 +1,31 @@
#version ogre_glsl_ver_330
#extension GL_ARB_shader_viewport_layer_array : require
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define float4x4 mat4
#define mul( x, y ) ((x) * (y))
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float4x4 projectionMatrix;
uniform float2 rsDepthRange;
vulkan( }; )
vulkan_layout( OGRE_POSITION ) in vec4 vertex;
vulkan_layout( location = 0 )
out gl_PerVertex
{
vec4 gl_Position;
};
void main()
{
gl_Position.xy = mul( projectionMatrix, float4( vertex.xy, 0.0f, 1.0f ) ).xy;
gl_Position.z = rsDepthRange.x;
gl_Position.w = 1.0f;
gl_ViewportIndex = int( vertex.z );
}

40
test/data/CommonMaterials/Common/GLSL/MipmapsGaussianBlur_cs.glsl

@ -0,0 +1,40 @@
//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps
//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256
@piece( data_type )vec3@end
@piece( lds_data_type )vec3@end
@piece( lds_definition )
shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];
@end
@piece( extra_params )
uniform float srcLodIdx;
@en