Browse Source

Ogre now successfully creates window

SDLAttempt
Macoy Madson 1 month ago
parent
commit
d2d0d5f103
100 changed files with 5397 additions and 38 deletions
  1. +4
    -1
      .gitignore
  2. +3
    -1
      Build_Debug.sh
  3. +11
    -1
      ReadMe.org
  4. +55
    -35
      src/OgreInitialize.cpp
  5. +8
    -0
      test/CopyOgreData.sh
  6. +1
    -0
      test/Jamfile
  7. +47
    -0
      test/data/CommonMaterials/Common/Any/PccDepthCompressor_ps.any
  8. +225
    -0
      test/data/CommonMaterials/Common/Copyback.material
  9. +65
    -0
      test/data/CommonMaterials/Common/DPM.material
  10. +134
    -0
      test/data/CommonMaterials/Common/DPSM.material
  11. +67
    -0
      test/data/CommonMaterials/Common/DepthUtils.material
  12. +117
    -0
      test/data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material
  13. +101
    -0
      test/data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material.json
  14. +18
    -0
      test/data/CommonMaterials/Common/GLSL/Copyback_1xFP32_ps.glsl
  15. +22
    -0
      test/data/CommonMaterials/Common/GLSL/Copyback_4xFP32_2DArray_ps.glsl
  16. +18
    -0
      test/data/CommonMaterials/Common/GLSL/Copyback_4xFP32_ps.glsl
  17. +28
    -0
      test/data/CommonMaterials/Common/GLSL/CubeToDpm_4xFP16_ps.glsl
  18. +37
    -0
      test/data/CommonMaterials/Common/GLSL/CubeToDpsm_ps.glsl
  19. +17
    -0
      test/data/CommonMaterials/Common/GLSL/DepthDownscaleMax_ps.glsl
  20. +22
    -0
      test/data/CommonMaterials/Common/GLSL/EsmGaussianBlurLogFilter_cs.glsl
  21. +263
    -0
      test/data/CommonMaterials/Common/GLSL/GaussianBlurBase_cs.glsl
  22. +285
    -0
      test/data/CommonMaterials/Common/GLSL/GaussianBlurLogFilterBase_cs.glsl
  23. +49
    -0
      test/data/CommonMaterials/Common/GLSL/GaussianBlurLogFilter_ps.glsl
  24. +9
    -0
      test/data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_ps.glsl
  25. +31
    -0
      test/data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_vs.glsl
  26. +40
    -0
      test/data/CommonMaterials/Common/GLSL/MipmapsGaussianBlur_cs.glsl
  27. +52
    -0
      test/data/CommonMaterials/Common/GLSL/PccDepthCompressor_ps.glsl
  28. +28
    -0
      test/data/CommonMaterials/Common/GLSL/QuadCameraDirNoUV_vs.glsl
  29. +28
    -0
      test/data/CommonMaterials/Common/GLSL/QuadCameraDir_vs.glsl
  30. +25
    -0
      test/data/CommonMaterials/Common/GLSL/Quad_vs.glsl
  31. +42
    -0
      test/data/CommonMaterials/Common/GLSL/RadialDensityMask_ps.glsl
  32. +28
    -0
      test/data/CommonMaterials/Common/GLSL/RadialDensityMask_vs.glsl
  33. +19
    -0
      test/data/CommonMaterials/Common/GLSL/Resolve_1xFP32_Subsample0_ps.glsl
  34. +20
    -0
      test/data/CommonMaterials/Common/GLSL/SkyCubemap_ps.glsl
  35. +37
    -0
      test/data/CommonMaterials/Common/GLSL/SkyEquirectangular_ps.glsl
  36. +35
    -0
      test/data/CommonMaterials/Common/GLSLES/CubeToDpsm_ps.glsles
  37. +25
    -0
      test/data/CommonMaterials/Common/GLSLES/QuadCameraDir_vs.glsles
  38. +22
    -0
      test/data/CommonMaterials/Common/GLSLES/Quad_vs.glsles
  39. +8
    -0
      test/data/CommonMaterials/Common/HLSL/Copyback_1xFP32_ps.hlsl
  40. +8
    -0
      test/data/CommonMaterials/Common/HLSL/Copyback_4xFP32_2DArray_ps.hlsl
  41. +8
    -0
      test/data/CommonMaterials/Common/HLSL/Copyback_4xFP32_ps.hlsl
  42. +25
    -0
      test/data/CommonMaterials/Common/HLSL/CubeToDpm_4xFP16_ps.hlsl
  43. +30
    -0
      test/data/CommonMaterials/Common/HLSL/CubeToDpsm_ps.hlsl
  44. +21
    -0
      test/data/CommonMaterials/Common/HLSL/DepthDownscaleMax_ps.hlsl
  45. +22
    -0
      test/data/CommonMaterials/Common/HLSL/EsmGaussianBlurLogFilter_cs.hlsl
  46. +242
    -0
      test/data/CommonMaterials/Common/HLSL/GaussianBlurBase_cs.hlsl
  47. +263
    -0
      test/data/CommonMaterials/Common/HLSL/GaussianBlurLogFilterBase_cs.hlsl
  48. +43
    -0
      test/data/CommonMaterials/Common/HLSL/GaussianBlurLogFilter_ps.hlsl
  49. +4
    -0
      test/data/CommonMaterials/Common/HLSL/HiddenAreaMeshVr_ps.hlsl
  50. +27
    -0
      test/data/CommonMaterials/Common/HLSL/HiddenAreaMeshVr_vs.hlsl
  51. +37
    -0
      test/data/CommonMaterials/Common/HLSL/MipmapsGaussianBlur_cs.hlsl
  52. +39
    -0
      test/data/CommonMaterials/Common/HLSL/PccDepthCompressor_ps.hlsl
  53. +31
    -0
      test/data/CommonMaterials/Common/HLSL/QuadCameraDirNoUV_vs.hlsl
  54. +31
    -0
      test/data/CommonMaterials/Common/HLSL/QuadCameraDir_vs.hlsl
  55. +25
    -0
      test/data/CommonMaterials/Common/HLSL/Quad_vs.hlsl
  56. +35
    -0
      test/data/CommonMaterials/Common/HLSL/RadialDensityMask_ps.hlsl
  57. +31
    -0
      test/data/CommonMaterials/Common/HLSL/RadialDensityMask_vs.hlsl
  58. +16
    -0
      test/data/CommonMaterials/Common/HLSL/Resolve_1xFP32_Subsample0_ps.hlsl
  59. +16
    -0
      test/data/CommonMaterials/Common/HLSL/SkyCubemap_ps.hlsl
  60. +25
    -0
      test/data/CommonMaterials/Common/HLSL/SkyEquirectangular_ps.hlsl
  61. +95
    -0
      test/data/CommonMaterials/Common/HiddenAreaMeshVr.material
  62. +17
    -0
      test/data/CommonMaterials/Common/Metal/Copyback_1xFP32_ps.metal
  63. +18
    -0
      test/data/CommonMaterials/Common/Metal/Copyback_4xFP32_2DArray_ps.metal
  64. +17
    -0
      test/data/CommonMaterials/Common/Metal/Copyback_4xFP32_ps.metal
  65. +28
    -0
      test/data/CommonMaterials/Common/Metal/CubeToDpm_4xFP16_ps.metal
  66. +37
    -0
      test/data/CommonMaterials/Common/Metal/CubeToDpsm_ps.metal
  67. +33
    -0
      test/data/CommonMaterials/Common/Metal/DepthDownscaleMax_ps.metal
  68. +25
    -0
      test/data/CommonMaterials/Common/Metal/EsmGaussianBlurLogFilter_cs.metal
  69. +269
    -0
      test/data/CommonMaterials/Common/Metal/GaussianBlurBase_cs.metal
  70. +284
    -0
      test/data/CommonMaterials/Common/Metal/GaussianBlurLogFilterBase_cs.metal
  71. +51
    -0
      test/data/CommonMaterials/Common/Metal/GaussianBlurLogFilter_ps.metal
  72. +7
    -0
      test/data/CommonMaterials/Common/Metal/HiddenAreaMeshVr_ps.metal
  73. +35
    -0
      test/data/CommonMaterials/Common/Metal/HiddenAreaMeshVr_vs.metal
  74. +42
    -0
      test/data/CommonMaterials/Common/Metal/MipmapsGaussianBlur_cs.metal
  75. +45
    -0
      test/data/CommonMaterials/Common/Metal/PccDepthCompressor_ps.metal
  76. +37
    -0
      test/data/CommonMaterials/Common/Metal/QuadCameraDirNoUV_vs.metal
  77. +32
    -0
      test/data/CommonMaterials/Common/Metal/QuadCameraDir_vs.metal
  78. +28
    -0
      test/data/CommonMaterials/Common/Metal/Quad_vs.metal
  79. +41
    -0
      test/data/CommonMaterials/Common/Metal/RadialDensityMask_ps.metal
  80. +38
    -0
      test/data/CommonMaterials/Common/Metal/RadialDensityMask_vs.metal
  81. +17
    -0
      test/data/CommonMaterials/Common/Metal/Resolve_1xFP32_Subsample0_ps.metal
  82. +18
    -0
      test/data/CommonMaterials/Common/Metal/SkyCubemap_ps.metal
  83. +27
    -0
      test/data/CommonMaterials/Common/Metal/SkyEquirectangular_ps.metal
  84. +87
    -0
      test/data/CommonMaterials/Common/Mipmaps.material.json
  85. +74
    -0
      test/data/CommonMaterials/Common/PccDepthCompressor.material
  86. +120
    -0
      test/data/CommonMaterials/Common/Quad.program
  87. +90
    -0
      test/data/CommonMaterials/Common/RadialDensityMask.material
  88. +137
    -0
      test/data/CommonMaterials/Common/Sky.material
  89. BIN
      test/data/CommonMaterials/Common/brtfLutDfg.dds
  90. BIN
      test/data/CommonMaterials/Common/ltcMatrix0.dds
  91. BIN
      test/data/CommonMaterials/Common/ltcMatrix1.dds
  92. +115
    -0
      test/data/Hlms/Common/Any/Cubemap_piece_all.any
  93. +17
    -0
      test/data/Hlms/Common/Any/DualParaboloid_piece_ps.any
  94. +10
    -0
      test/data/Hlms/Common/Any/ReverseDepthHelpers_piece_ps.any
  95. +44
    -0
      test/data/Hlms/Common/Any/ShadowCaster_piece_ps.any
  96. +47
    -0
      test/data/Hlms/Common/Any/ShadowCaster_piece_vs.any
  97. +7
    -0
      test/data/Hlms/Common/Any/UnpackHelpers_piece_all.any
  98. +224
    -0
      test/data/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl
  99. +104
    -0
      test/data/Hlms/Common/GLSL/Matrix_piece_all.glsl
  100. +55
    -0
      test/data/Hlms/Common/GLSL/QuaternionCode_piece_all.glsl

+ 4
- 1
.gitignore View File

@@ -36,4 +36,7 @@
cakelisp_cache/

bin/
lib/
lib/

test/ogreApp
test/output/

+ 3
- 1
Build_Debug.sh View File

@@ -3,4 +3,6 @@
./Dependencies/cakelisp/bin/cakelisp src/OgreCore.cake \
&& jam -j4 -sDEBUG_BUILD=true libGameLib.a

./Dependencies/cakelisp/bin/cakelisp test/OgreApp.cake && cd test && jam -j4 -sDEBUG_BUILD=true
# TODO: Figure out how to remove clean by teaching jam about libGameLib.a (dyn link instead?)
./Dependencies/cakelisp/bin/cakelisp test/OgreApp.cake && cd test \
&& jam clean && jam -j4 -sDEBUG_BUILD=true

+ 11
- 1
ReadMe.org View File

@@ -3,8 +3,18 @@
GameLib is my library for making games. It is the successor to [[https://macoy.me/code/macoy/base2.0][base2.0]].

* Setup
Clone the repository and its dependencies:
#+BEGIN_SRC sh
git clone https://macoy.me/code/macoy/gamelib.git
git submodule update --init --recursive
#+END_SRC

Build dependencies:
#+BEGIN_SRC sh
./BuildDependencies_Debug.sh
#+END_SRC

Build:
#+BEGIN_SRC sh
./Build_Debug.sh
#+END_SRC

+ 55
- 35
src/OgreInitialize.cpp View File

@@ -1,3 +1,5 @@
#include <stdio.h>

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Woverloaded-virtual"
#include "OgreArchiveManager.h"
@@ -121,7 +123,7 @@ void OgreInitialize()
using namespace Ogre;

const String pluginsFolder = "./data/";
const String writeAccessFolder = "./";
const String writeAccessFolder = "./output/";

#ifndef OGRE_STATIC_LIB
#if OGRE_DEBUG_MODE
@@ -134,41 +136,58 @@ void OgreInitialize()
writeAccessFolder + "ogre.cfg", //
writeAccessFolder + "Ogre.log");

// This allows the user to configure the graphics. It's damn annoying during dev though
// TODO: Make this return false and quit the app
if (!root->showConfigDialog())
return; // false
// if (!root->showConfigDialog())
// return; // false

// Initialize Root
root->getRenderSystem()->setConfigOption("sRGB Gamma Conversion", "Yes");
// Window* window = root->initialise(/*autoCreateWindow=*/true, "Spargus Ogre");
root->initialise(/*autoCreateWindow=*/true);

Ogre::NameValuePairList windowSettings;
// unsigned long winHandle = reinterpret_cast<unsigned long>(win.getBase()->getSystemHandle());
// #ifdef _WIN32
// unsigned long winGlContext = reinterpret_cast<unsigned long>(wglGetCurrentContext());

// windowSettings["externalWindowHandle"] = StringConverter::toString(winHandle);
// windowSettings["externalGLContext"] = StringConverter::toString(winGlContext);
// windowSettings["externalGLControl"] = String("True");
// #else
// // Deprecated. See ogre-next/RenderSystems/GL3Plus/src/windowing/GLX/OgreGLXWindow.cpp:237
// // windowSettings["externalWindowHandle"] = StringConverter::toString(winHandle);
// windowSettings["parentWindowHandle"] = StringConverter::toString(winHandle);
// // sf::Context context;
// // unsigned long activeContextId = (unsigned long)context.getActiveContextId();
// windowSettings["currentGLContext"] = String("True");
// // windowSettings["currentGLContext"] = StringConverter::toString(activeContextId);
// // windowSettings["externalGLControl"] = String("True");
// #endif

int winWidth = 1024;
int winHeight = 1024;

Window* window =
root->createRenderWindow("GameLib", winWidth, winHeight, true, &windowSettings);

// window->_setVisible(true);
RenderSystem* renderSystem = root->getRenderSystemByName("OpenGL 3+ Rendering Subsystem");
if (!renderSystem)
{
printf("Render system not found!\n");
return;
}

// renderSystem->setConfigOption("Display Frequency", "N/A");
renderSystem->setConfigOption("Full Screen", "No");
// renderSystem->setConfigOption("VSync", "Yes");
renderSystem->setConfigOption("Video Mode", "1920 x 1080");
renderSystem->setConfigOption("sRGB Gamma Conversion", "Yes");
root->setRenderSystem(renderSystem);
Window* window = root->initialise(/*autoCreateWindow=*/true, "GameLib");

// Old bad attempt at Ogre within SFML (only party worked)
{
// Ogre::NameValuePairList windowSettings;
// unsigned long winHandle = reinterpret_cast<unsigned
// long>(win.getBase()->getSystemHandle()); #ifdef _WIN32 unsigned long winGlContext =
// reinterpret_cast<unsigned long>(wglGetCurrentContext());

// windowSettings["externalWindowHandle"] = StringConverter::toString(winHandle);
// windowSettings["externalGLContext"] = StringConverter::toString(winGlContext);
// windowSettings["externalGLControl"] = String("True");
// #else
// // Deprecated. See
// ogre-next/RenderSystems/GL3Plus/src/windowing/GLX/OgreGLXWindow.cpp:237
// // windowSettings["externalWindowHandle"] = StringConverter::toString(winHandle);
// windowSettings["parentWindowHandle"] = StringConverter::toString(winHandle);
// // sf::Context context;
// // unsigned long activeContextId = (unsigned long)context.getActiveContextId();
// windowSettings["currentGLContext"] = String("True");
// // windowSettings["currentGLContext"] = StringConverter::toString(activeContextId);
// // windowSettings["externalGLControl"] = String("True");
// #endif

// int winWidth = 1920;
// int winHeight = 1080;

// Window* window =
// // root->createRenderWindow("GameLib", winWidth, winHeight, true, &windowSettings);
// root->createRenderWindow("GameLib", winWidth, winHeight, true);

// window->_setVisible(true);
}

registerHlms();

@@ -189,7 +208,7 @@ void OgreInitialize()

// Setup a basic compositor with a blue clear colour
CompositorManager2* compositorManager = root->getCompositorManager2();
const String workspaceName("Demo Workspace");
const String workspaceName("Main Workspace");
// const IdString definitionNameId = workspaceName;
const ColourValue backgroundColour(0.2f, 0.4f, 0.6f);
compositorManager->createBasicWorkspaceDef(workspaceName, backgroundColour, IdString());
@@ -197,6 +216,7 @@ void OgreInitialize()
true);

// Mesh importing
if (false)
{
Ogre::ResourceGroupManager::getSingleton().addResourceLocation("data/Models", "FileSystem",
"Models");
@@ -237,7 +257,7 @@ void OgreInitialize()
{
return;
}
// WTF
// WTF - this should work, but it's almost like the root scene node's vtable is wrong
// Ogre::SceneNode* sceneNode = rootSceneNode->createChildSceneNode(Ogre::SCENE_DYNAMIC);
Ogre::SceneNode* sceneNode = static_cast<SceneNode*>(
rootSceneNode->createChild()); // rootSceneNode->createChildSceneNode();


+ 8
- 0
test/CopyOgreData.sh View File

@@ -0,0 +1,8 @@
#!/bin/sh

mkdir -p data/Hlms || exit $?
mkdir -p data/CommonMaterials || exit $?
rsync -av ../Dependencies/ogre-next/Samples/Media/Hlms/Common data/Hlms/ || exit $?
rsync -av ../Dependencies/ogre-next/Samples/Media/Hlms/Pbs data/Hlms/ || exit $?
rsync -av ../Dependencies/ogre-next/Samples/Media/Hlms/Unlit data/Hlms/ || exit $?
rsync -av ../Dependencies/ogre-next/Samples/Media/2.0/scripts/materials/Common data/CommonMaterials || exit $?

+ 1
- 0
test/Jamfile View File

@@ -1,3 +1,4 @@
SubDir . ;

# TODO: This should depend on libGamelib such that Jam knows it needs to re-link if Gamelib updates
Main ogreApp : OgreApp.cake.cpp ;

+ 47
- 0
test/data/CommonMaterials/Common/Any/PccDepthCompressor_ps.any View File

@@ -0,0 +1,47 @@

//#include "SyntaxHighlightingMisc.h"

#ifdef HEADER
/**
Finds the intersection between the cube of half size probeShapeHalfSize and center at origin
and the reflDir, so that posLS + reflDirLS * retVal = intersectionPos;
*/
INLINE float findIntersectionDistance( float3 reflDirLS, float3 posLS, float3 probeShapeHalfSize )
{
//Find the ray intersection with box plane
float3 invReflDirLS = float3( 1.0, 1.0, 1.0 ) / reflDirLS;
float3 intersectAtMinPlane = ( -probeShapeHalfSize - posLS ) * invReflDirLS;
float3 intersectAtMaxPlane = ( probeShapeHalfSize - posLS ) * invReflDirLS;
//Get the largest intersection values (we are not interested in negative values)
float3 largestIntersect = max( intersectAtMaxPlane.xyz, intersectAtMinPlane.xyz );
//Get the closest of all solutions
float distance = min( min( largestIntersect.x, largestIntersect.y ), largestIntersect.z );
return distance;
}

#else

float fDepth = OGRE_Sample( depthTexture, pointSampler, inPs.uv0 ).x;

float linearDepth = p_projectionParams.y / (fDepth - p_projectionParams.x);

float3 viewSpacePosition = inPs.cameraDir * linearDepth;

float fDist = length( viewSpacePosition.xyz );
float3 probeToPosDir = viewSpacePosition / fDist;

probeToPosDir = mul( p_viewSpaceToProbeLocalSpace, probeToPosDir );

float fApproxDist = findIntersectionDistance( probeToPosDir, p_cameraPosLS, p_probeShapeHalfSize );

//We can't store fDist directly because we have limited precision (often 8 bits)
//Thus we store it in terms of
// fApproxDist * alpha = fDist;
//During render we'll know fApproxDist and alpha, but want to know fDist
//We also know alpha >= 0
//For precision issues and because it's good enough, we force alpha <= 2.0
float alpha = fDist / fApproxDist;
alpha *= 0.5;
alpha = min( alpha, 1.0 );

#endif

+ 225
- 0
test/data/CommonMaterials/Common/Copyback.material View File

@@ -0,0 +1,225 @@
fragment_program Ogre/Copy/4xFP32_ps_HLSL hlsl
{
source Copyback_4xFP32_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Copy/4xFP32_ps_GLSL glsl
{
source Copyback_4xFP32_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Copy/4xFP32_ps_VK glslvk
{
source Copyback_4xFP32_ps.glsl
}
fragment_program Ogre/Copy/4xFP32_ps_Metal metal
{
source Copyback_4xFP32_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Copy/4xFP32_ps unified
{
delegate Ogre/Copy/4xFP32_ps_GLSL
delegate Ogre/Copy/4xFP32_ps_VK
delegate Ogre/Copy/4xFP32_ps_HLSL
delegate Ogre/Copy/4xFP32_ps_Metal
}
material Ogre/Copy/4xFP32
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Copy/4xFP32_ps
{
}
texture_unit
{
filtering none
tex_address_mode clamp
}
}
}
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_HLSL hlsl
{
source Copyback_4xFP32_2DArray_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_GLSL glsl
{
source Copyback_4xFP32_2DArray_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_VK glslvk
{
source Copyback_4xFP32_2DArray_ps.glsl
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps_Metal metal
{
source Copyback_4xFP32_2DArray_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Copy/4xFP32_2DArray_ps unified
{
delegate Ogre/Copy/4xFP32_2DArray_ps_GLSL
delegate Ogre/Copy/4xFP32_2DArray_ps_VK
delegate Ogre/Copy/4xFP32_2DArray_ps_HLSL
delegate Ogre/Copy/4xFP32_2DArray_ps_Metal
}
material Ogre/Copy/4xFP32_2DArray
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Copy/4xFP32_2DArray_ps
{
param_named sliceIdx float 0
}
texture_unit
{
filtering none
tex_address_mode clamp
}
}
}
}
fragment_program Ogre/Copy/1xFP32_ps_HLSL hlsl
{
source Copyback_1xFP32_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Copy/1xFP32_ps_GLSL glsl
{
source Copyback_1xFP32_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Copy/1xFP32_ps_VK glslvk
{
source Copyback_1xFP32_ps.glsl
}
fragment_program Ogre/Copy/1xFP32_ps_Metal metal
{
source Copyback_1xFP32_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Copy/1xFP32_ps unified
{
delegate Ogre/Copy/1xFP32_ps_GLSL
delegate Ogre/Copy/1xFP32_ps_VK
delegate Ogre/Copy/1xFP32_ps_HLSL
delegate Ogre/Copy/1xFP32_ps_Metal
}
material Ogre/Copy/1xFP32 : Ogre/Copy/4xFP32
{
technique
{
pass
{
fragment_program_ref Ogre/Copy/1xFP32_ps
{
}
}
}
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_GLSL glsl
{
source Resolve_1xFP32_Subsample0_ps.glsl
default_params { param_named tex int 0 }
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_VK glslvk
{
source Resolve_1xFP32_Subsample0_ps.glsl
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_HLSL hlsl
{
source Resolve_1xFP32_Subsample0_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps_Metal metal
{
source Resolve_1xFP32_Subsample0_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/Resolve/1xFP32_Subsample0_ps unified
{
delegate Ogre/Resolve/1xFP32_Subsample0_ps_GLSL
delegate Ogre/Resolve/1xFP32_Subsample0_ps_VK
delegate Ogre/Resolve/1xFP32_Subsample0_ps_HLSL
delegate Ogre/Resolve/1xFP32_Subsample0_ps_Metal
}
material Ogre/Resolve/1xFP32_Subsample0
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref Ogre/Resolve/1xFP32_Subsample0_ps
{
}
texture_unit
{
filtering none
tex_address_mode clamp
}
}
}
}

+ 65
- 0
test/data/CommonMaterials/Common/DPM.material View File

@@ -0,0 +1,65 @@
//DPM stands for Dual Parabolloid Mapping.

fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL glsl
{
source CubeToDpm_4xFP16_ps.glsl
default_params
{
param_named cubeTexture int 0
}
}

fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_VK glslvk
{
source CubeToDpm_4xFP16_ps.glsl
}

fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL hlsl
{
source CubeToDpm_4xFP16_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}

fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_Metal metal
{
source CubeToDpm_4xFP16_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}

fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps unified
{
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_VK
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL
delegate Ogre/DPM/CubeToDpm_4xFP16_ps_Metal
}

// Converts a cubemap to DPM in the pixel shader.
material Ogre/DPM/CubeToDpm
{
technique
{
pass
{
depth_check off
depth_func always_pass

cull_hardware none

vertex_program_ref Ogre/Compositor/Quad_vs
{
}

fragment_program_ref Ogre/DPM/CubeToDpm_4xFP16_ps
{
}

texture_unit depthTexture
{
filtering bilinear
tex_address_mode clamp
}
}
}
}

+ 134
- 0
test/data/CommonMaterials/Common/DPSM.material View File

@@ -0,0 +1,134 @@
//DPSM stands for Dual Parabolloid Shadow Mapping.

fragment_program Ogre/DPSM/CubeToDpsm_ps_GLSL glsl
{
source CubeToDpsm_ps.glsl
default_params
{
param_named depthTexture int 0
}
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_GLSL glsl : Ogre/DPSM/CubeToDpsm_ps_GLSL
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}

fragment_program Ogre/DPSM/CubeToDpsm_ps_VK glslvk
{
source CubeToDpsm_ps.glsl
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_VK glslvk : Ogre/DPSM/CubeToDpsm_ps_VK
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}

fragment_program Ogre/DPSM/CubeToDpsm_ps_GLSLES glsles
{
source CubeToDpsm_ps.glsles
default_params
{
param_named depthTexture int 0
}
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_GLSLES glsles : Ogre/DPSM/CubeToDpsm_ps_GLSLES
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}

fragment_program Ogre/DPSM/CubeToDpsm_ps_HLSL hlsl
{
source CubeToDpsm_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_HLSL hlsl : Ogre/DPSM/CubeToDpsm_ps_HLSL
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}

fragment_program Ogre/DPSM/CubeToDpsm_ps_Metal metal
{
source CubeToDpsm_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps_Metal metal : Ogre/DPSM/CubeToDpsm_ps_Metal
{
preprocessor_defines OUTPUT_TO_COLOUR=1
}

fragment_program Ogre/DPSM/CubeToDpsm_ps unified
{
delegate Ogre/DPSM/CubeToDpsm_ps_GLSL
delegate Ogre/DPSM/CubeToDpsm_ps_GLSLES
delegate Ogre/DPSM/CubeToDpsm_ps_VK
delegate Ogre/DPSM/CubeToDpsm_ps_HLSL
delegate Ogre/DPSM/CubeToDpsm_ps_Metal
}
fragment_program Ogre/DPSM/CubeToDpsm_Colour_ps unified
{
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_GLSL
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_GLSLES
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_VK
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_HLSL
delegate Ogre/DPSM/CubeToDpsm_Colour_ps_Metal
}

// Converts a cubemap to DPSM in the pixel shader.
material Ogre/DPSM/CubeToDpsm
{
technique
{
pass
{
//Depth writes must be on, since we write directly to the depth buffer.
depth_check on
depth_write on

depth_func always_pass

cull_hardware none

vertex_program_ref Ogre/Compositor/Quad_vs
{
}

fragment_program_ref Ogre/DPSM/CubeToDpsm_ps
{
}

texture_unit depthTexture
{
filtering none
tex_address_mode clamp
}
}
}
}

material Ogre/DPSM/CubeToDpsmColour
{
technique
{
pass
{
depth_check off
depth_write off

cull_hardware none

vertex_program_ref Ogre/Compositor/Quad_vs
{
}

fragment_program_ref Ogre/DPSM/CubeToDpsm_Colour_ps
{
}

texture_unit depthTexture
{
filtering none
tex_address_mode clamp
}
}
}
}

+ 67
- 0
test/data/CommonMaterials/Common/DepthUtils.material View File

@@ -0,0 +1,67 @@

fragment_program Ogre/Depth/DownscaleMax_ps_GLSL glsl
{
source DepthDownscaleMax_ps.glsl
default_params
{
param_named depthTexture int 0
}
}

fragment_program Ogre/Depth/DownscaleMax_ps_VK glslvk
{
source DepthDownscaleMax_ps.glsl
}

fragment_program Ogre/Depth/DownscaleMax_ps_HLSL hlsl
{
source DepthDownscaleMax_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
}

fragment_program Ogre/Depth/DownscaleMax_ps_Metal metal
{
source DepthDownscaleMax_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
}

fragment_program Ogre/Depth/DownscaleMax_ps unified
{
delegate Ogre/Depth/DownscaleMax_ps_GLSL
delegate Ogre/Depth/DownscaleMax_ps_VK
delegate Ogre/Depth/DownscaleMax_ps_HLSL
delegate Ogre/Depth/DownscaleMax_ps_Metal
}

// Downscales resolution of input depth texture by half (w/2 x h/2)
// using a max filter (max depth of all 4 neighbours)
material Ogre/Depth/DownscaleMax
{
technique
{
pass
{
depth_check on
depth_write on
depth_func always_pass

cull_hardware none

vertex_program_ref Ogre/Compositor/Quad_vs
{
}

fragment_program_ref Ogre/Depth/DownscaleMax_ps
{
}

texture_unit depthTexture
{
filtering none
tex_address_mode clamp
}
}
}
}

+ 117
- 0
test/data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material View File

@@ -0,0 +1,117 @@
fragment_program ESM/GaussianLogFilterH_ps_GLSL glsl
{
source GaussianBlurLogFilter_ps.glsl
preprocessor_defines NUM_WEIGHTS=9,K=80,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
default_params
{
param_named tex int 0
}
}
fragment_program ESM/GaussianLogFilterV_ps_GLSL glsl : ESM/GaussianLogFilterH_ps_GLSL
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps_VK glslvk
{
source GaussianBlurLogFilter_ps.glsl
preprocessor_defines NUM_WEIGHTS=9,K=80,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
}
fragment_program ESM/GaussianLogFilterV_ps_VK glslvk : ESM/GaussianLogFilterH_ps_VK
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps_HLSL hlsl
{
source GaussianBlurLogFilter_ps.hlsl
entry_point main
target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
}
fragment_program ESM/GaussianLogFilterV_ps_HLSL hlsl : ESM/GaussianLogFilterH_ps_HLSL
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps_Metal metal
{
source GaussianBlurLogFilter_ps.metal
shader_reflection_pair_hint Ogre/Compositor/Quad_vs
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=1,VERTICAL_STEP=0
}
fragment_program ESM/GaussianLogFilterV_ps_Metal metal : ESM/GaussianLogFilterH_ps_Metal
{
preprocessor_defines NUM_WEIGHTS=9,K=80,HORIZONTAL_STEP=0,VERTICAL_STEP=1
}
fragment_program ESM/GaussianLogFilterH_ps unified
{
delegate ESM/GaussianLogFilterH_ps_GLSL
delegate ESM/GaussianLogFilterH_ps_VK
delegate ESM/GaussianLogFilterH_ps_HLSL
delegate ESM/GaussianLogFilterH_ps_Metal
default_params
{
param_named weights float9 0.013960189 0.022308320 0.033488754 0.047226712 0.062565230 0.077863686 0.091031872 0.099978946 0.10315263
}
}
fragment_program ESM/GaussianLogFilterV_ps unified
{
delegate ESM/GaussianLogFilterV_ps_GLSL
delegate ESM/GaussianLogFilterV_ps_VK
delegate ESM/GaussianLogFilterV_ps_HLSL
delegate ESM/GaussianLogFilterV_ps_Metal
default_params
{
param_named weights float9 0.013960189 0.022308320 0.033488754 0.047226712 0.062565230 0.077863686 0.091031872 0.099978946 0.10315263
}
}
material ESM/GaussianLogFilterH
{
technique
{
pass
{
depth_check off
depth_write off
cull_hardware none
vertex_program_ref Ogre/Compositor/Quad_vs
{
}
fragment_program_ref ESM/GaussianLogFilterH_ps
{
}
texture_unit tex
{
filtering none
tex_address_mode clamp
}
}
}
}
material ESM/GaussianLogFilterV : ESM/GaussianLogFilterH
{
technique
{
pass
{
fragment_program_ref ESM/GaussianLogFilterV_ps
{
}
}
}
}

+ 101
- 0
test/data/CommonMaterials/Common/EsmGaussianBlurLogFilter.material.json View File

@@ -0,0 +1,101 @@
{
"samplers" :
{
"PointClamp" :
{
"min" : "point",
"mag" : "point",
"mip" : "point",
"u" : "clamp",
"v" : "clamp",
"w" : "clamp",
"miplodbias" : 0,
"max_anisotropic" : 1,
"compare_function" : "disabled",
"border" : [1, 1, 1, 1],
"min_lod" : -3.40282347E+38,
"max_lod" : 3.40282347E+38
}
},
"compute" :
{
"ESM/GaussianLogFilterH" :
{
"threads_per_group" : [32, 2, 1],
"thread_groups" : [8, 512, 1],
"source" : "GaussianBlurLogFilterBase_cs",
"pieces" : ["EsmGaussianBlurLogFilter_cs"],
"inform_shader_of_texture_data_change" : true,
"uav_units" : 1,
"textures" :
[
{
"sampler" : "PointClamp"
}
],
"params" :
[
["g_f4OutputSize", "packed_texture_size", 0],
["c_weights[0]", [0.013960189, 0.022308320, 0.033488754, 0.047226712,
0.062565230, 0.077863686, 0.091031872, 0.099978946, 0.10315263]]
],
"params_glsl" :
[
["inputImage", [0], "int"],
["outputImage", [0], "int"]
],
"properties" :
{
"horizontal_pass" : 1,
"kernel_radius" : 8,
"K" : 80
}
},
"ESM/GaussianLogFilterV" :
{
"threads_per_group" : [32, 2, 1],
"thread_groups" : [512, 8, 1],
"source" : "GaussianBlurLogFilterBase_cs",
"pieces" : ["EsmGaussianBlurLogFilter_cs"],
"inform_shader_of_texture_data_change" : true,
"uav_units" : 1,
"textures" :
[
{
"sampler" : "PointClamp"
}
],
"params" :
[
["g_f4OutputSize", "packed_texture_size", 0],
["c_weights[0]", [0.013960189, 0.022308320, 0.033488754, 0.047226712,
0.062565230, 0.077863686, 0.091031872, 0.099978946, 0.10315263]]
],
"params_glsl" :
[
["inputImage", [0], "int"],
["outputImage", [0], "int"]
],
"properties" :
{
"horizontal_pass" : 0,
"kernel_radius" : 8,
"K" : 80
}
}
}
}

+ 18
- 0
test/data/CommonMaterials/Common/GLSL/Copyback_1xFP32_ps.glsl View File

@@ -0,0 +1,18 @@
#version ogre_glsl_ver_330

vulkan_layout( ogre_t0 ) uniform texture2D tex;
vulkan( layout( ogre_s0 ) uniform sampler texSampler );

vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;

vulkan_layout( location = 0 )
out float fragColour;

void main()
{
fragColour = texture( vkSampler2D( tex, texSampler ), inPs.uv0 ).x;
}

+ 22
- 0
test/data/CommonMaterials/Common/GLSL/Copyback_4xFP32_2DArray_ps.glsl View File

@@ -0,0 +1,22 @@
#version ogre_glsl_ver_330

vulkan_layout( ogre_t0 ) uniform texture2DArray tex;
vulkan( layout( ogre_s0 ) uniform sampler texSampler );

vulkan( layout( ogre_P0 ) uniform Params { )
uniform float sliceIdx;
vulkan( }; )

vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;

vulkan_layout( location = 0 )
out vec4 fragColour;

void main()
{
fragColour = texture( vkSampler2DArray( tex, texSampler ), vec3( inPs.uv0, sliceIdx ) );
}

+ 18
- 0
test/data/CommonMaterials/Common/GLSL/Copyback_4xFP32_ps.glsl View File

@@ -0,0 +1,18 @@
#version ogre_glsl_ver_330

vulkan_layout( ogre_t0 ) uniform texture2D tex;
vulkan( layout( ogre_s0 ) uniform sampler texSampler );

vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;

vulkan_layout( location = 0 )
out vec4 fragColour;

void main()
{
fragColour = texture( vkSampler2D( tex, texSampler ), inPs.uv0 );
}

+ 28
- 0
test/data/CommonMaterials/Common/GLSL/CubeToDpm_4xFP16_ps.glsl View File

@@ -0,0 +1,28 @@
#version ogre_glsl_ver_330
vulkan( layout( ogre_P0 ) uniform Params { )
uniform float lodLevel;
vulkan( }; )
vulkan_layout( ogre_t0 ) uniform textureCube cubeTexture;
vulkan( layout( ogre_s0 ) uniform sampler cubeSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
vulkan_layout( location = 0 )
out vec4 fragColour;
void main()
{
vec3 cubeDir;
cubeDir.x = mod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0;
cubeDir.y = inPs.uv0.y * 2.0 - 1.0;
cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y);
cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z;
fragColour.xyzw = textureLod( vkSamplerCube( cubeTexture, cubeSampler ), cubeDir.xyz, lodLevel ).xyzw;
}

+ 37
- 0
test/data/CommonMaterials/Common/GLSL/CubeToDpsm_ps.glsl View File

@@ -0,0 +1,37 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform textureCube depthTexture;
vulkan( layout( ogre_s0 ) uniform sampler cubeSampler );
vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;
in vec4 gl_FragCoord;
//out float gl_FragDepth;
#ifdef OUTPUT_TO_COLOUR
vulkan_layout( location = 0 )
out float fragColour;
#endif
void main()
{
vec3 cubeDir;
cubeDir.x = mod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0;
cubeDir.y = inPs.uv0.y * 2.0 - 1.0;
cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y);
cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z;
float depthValue = textureLod( vkSamplerCube( depthTexture, cubeSampler ), cubeDir.xyz, 0 ).x;
#ifdef OUTPUT_TO_COLOUR
fragColour = depthValue;
#else
gl_FragDepth = depthValue;
#endif
}

+ 17
- 0
test/data/CommonMaterials/Common/GLSL/DepthDownscaleMax_ps.glsl View File

@@ -0,0 +1,17 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2D depthTexture;
in vec4 gl_FragCoord;
//out float gl_FragDepth;
void main()
{
float fDepth0 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0), 0 ).x;
float fDepth1 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 0, 1 ), 0 ).x;
float fDepth2 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 1, 0 ), 0 ).x;
float fDepth3 = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0) + ivec2( 1, 1 ), 0 ).x;
//gl_FragDepth = texelFetch( depthTexture, ivec2(gl_FragCoord.xy * 2.0), 0 ).x;
gl_FragDepth = max( max( fDepth0, fDepth1 ), max( fDepth2, fDepth3 ) );
}

+ 22
- 0
test/data/CommonMaterials/Common/GLSL/EsmGaussianBlurLogFilter_cs.glsl View File

@@ -0,0 +1,22 @@
//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps

//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256

@piece( data_type )float@end
@piece( lds_data_type )float@end
@piece( lds_definition )shared float g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];@end

@piece( image_sample )
return textureLod( vkSampler2D( inputImage, inputSampler ), f2SamplePosition, 0 ).x;
@end

@piece( image_store )
@foreach( 4, iPixel )
imageStore( outputImage, ivec2( i2Center + @iPixel * i2Inc ), vec4( outColour[ @iPixel ], 0, 0, 1.0 ) );@end
@end

+ 263
- 0
test/data/CommonMaterials/Common/GLSL/GaussianBlurBase_cs.glsl View File

@@ -0,0 +1,263 @@
@property( syntax != glslvk )
#version 430
@else
#version 450
@end

//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps

//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256

// For this shader to work, several pieces need to be defined:
// data_type (i.e. vec3)
// lds_data_type (i.e. vec3, uint)
// lds_definition
// image_store
// image_sample
// decode_lds (optional, i.e. when lds_data_type != data_type)
// Define the property "downscale" if you're doing a downsample.
// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale
// The script uses the template syntax to automatically set the num. of threadgroups
// based on the bound input texture.

vulkan( layout( ogre_s0 ) uniform sampler inputSampler );
@property( texture0_texture_type == TextureTypes_Type2DArray )
vulkan_layout( ogre_t0 ) uniform texture2DArray inputImage;
@else
vulkan_layout( ogre_t0 ) uniform texture2D inputImage;
@end

layout( vulkan( ogre_u0 ) vk_comma @insertpiece(uav0_pf_type) )
@property( uav0_texture_type == TextureTypes_Type2DArray )
uniform restrict writeonly image2DArray outputImage;
@else
uniform restrict writeonly image2D outputImage;
@end

// 32 = 128 / 4
layout( local_size_x = 32,
local_size_y = 2,
local_size_z = 1 ) in;
@pset( threads_per_group_x, 32 )
@pset( threads_per_group_y, 2 )
@pset( threads_per_group_z, 1 )

@pmul( pixelsPerRow, threads_per_group_x, 4 )
@pset( rowsPerThreadGroup, threads_per_group_y )
@pset( num_thread_groups_z, 1 )

@set( input_width, uav0_width_with_lod )
@set( input_height, uav0_height_with_lod )

@property( horizontal_pass )
@property( downscale ) @mul( input_width, 2 ) @end

/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow
/// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup
@add( num_thread_groups_x, input_width, pixelsPerRow )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, pixelsPerRow )

@add( num_thread_groups_y, input_height, rowsPerThreadGroup )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, rowsPerThreadGroup )
@end @property( !horizontal_pass )
@property( downscale ) @mul( input_height, 2 ) @end

/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup
/// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow
@add( num_thread_groups_x, input_width, rowsPerThreadGroup )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, rowsPerThreadGroup )

@add( num_thread_groups_y, input_height, pixelsPerRow )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, pixelsPerRow )
@end

/// shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];
@insertpiece( lds_definition )

#define C_WEIGHTS( x ) c_weights[(x) >> 2u][(x) & 3u]

// weights_array_count = ( kernel_radius + 1u + 3u ) / 4u
@padd( weights_array_count, kernel_radius, 4 )
@pdiv( weights_array_count, 4 )

vulkan( layout( ogre_P0 ) uniform Params { )
uniform vec4 g_f4OutputSize;
uniform vec4 c_weights[@value( weights_array_count )];
@insertpiece( extra_params )
vulkan( }; )

@insertpiece( lds_data_type ) sampleTex( ivec2 i2Position , vec2 f2Offset )
{
vec2 f2SamplePosition = vec2( i2Position ) + vec2( 0.5f, 0.5f );

f2SamplePosition *= g_f4OutputSize.zw;
///return textureLod( inputImage, f2SamplePosition, 0 ).xyz;
@insertpiece( image_sample )
}

void ComputeFilterKernel( int iPixelOffset, int iLineOffset, ivec2 i2Center, ivec2 i2Inc )
{
@property( !downscale_lq )
@insertpiece( data_type ) outColour[ 4 ];
@end @property( downscale_lq )
@insertpiece( data_type ) outColour[ 2 ];
@end
@insertpiece( data_type ) RDI[ 4 ] ;

@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end

@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] = RDI[ @iPixel ] * C_WEIGHTS( @value( kernel_radius ) );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] = RDI[ @iPixel * 2 ] * C_WEIGHTS( @value( kernel_radius ) );@end
@end

@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end

iPixelOffset += 4;

/// Deal with taps to our left.
/// for ( iIteration = 0; iIteration < radius; iIteration += 1 )
@foreach( kernel_radius, iIteration )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel ] * C_WEIGHTS( @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * C_WEIGHTS( @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end

@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end

@padd( kernel_radius_plus1, kernel_radius, 1 )
@pmul( kernel_radius2x_plus1, kernel_radius, 2 )
@padd( kernel_radius2x_plus1, 1 )

@pmul( kernel_radius2x, kernel_radius, 2 )

/// Deal with taps to our right.
/// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 )
@foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel ] * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end

/*
foreach( 4, iPixel )
imageStore( outputImage, ivec2( i2Center + iPixel * i2Inc ), vec4( outColour[ iPixel ], 1.0 ) );end
*/
@insertpiece( image_store )
}

void main()
{
/// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 )
/// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 )
@padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 )
@pdiv( samples_per_thread, samples_per_threadgroup, 32 )

@property( horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );

ivec2 i2GroupCoord = ivec2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u );
ivec2 i2Coord = ivec2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y );

@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( @i, gl_LocalInvocationID.y ) , vec2( 0.5f, 0.0f ) );@end

if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), vec2( 0.5f, 0.0f ) );
}

//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();

int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y );
i2Coord.x += @value( kernel_radius );

if( i2Coord.x < int(g_f4OutputSize.x) )
{
ivec2 i2Center = i2Coord + ivec2( 0, gl_LocalInvocationID.y );
ivec2 i2Inc = ivec2 ( 1, 0 );

@property( downscale )
i2Center.x = int( uint( i2Center.x ) >> 1u );
@end

ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end @property( !horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );

ivec2 i2GroupCoord = ivec2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u );
ivec2 i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset );

@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( gl_LocalInvocationID.y, @i ) , vec2( 0.0f, 0.5f ) );@end

if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), vec2( 0.0f, 0.5f ) );
}

//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();

int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset );
i2Coord.y += @value( kernel_radius );

if( i2Coord.y < int(g_f4OutputSize.y) )
{
ivec2 i2Center = i2Coord + ivec2( gl_LocalInvocationID.y, 0 );
ivec2 i2Inc = ivec2 ( 0, 1 );

@property( downscale )
i2Center.y = int( uint( i2Center.y ) >> 1u );
@end

ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end
}

+ 285
- 0
test/data/CommonMaterials/Common/GLSL/GaussianBlurLogFilterBase_cs.glsl View File

@@ -0,0 +1,285 @@
@property( syntax != glslvk )
#version 430
@else
#version 450
@end

//See GaussianBlurBase_cs for the original.
//This is a derived version which is used for filtering ESM (Exponential Shadow Maps).
//Normally ESM is in exponential space: exp( K * linearSpaceDepth );
//Filtering should be done in that space.
//However because of precision reasons, we store linearSpaceDepth instead. In order to perform
//correct filtering, we use the following formula:
// exp( filteredDepth ) = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 ) + ...
//
//But this is not precision friendly. So we do instead:
// = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 )
// = exp( d0 ) * ( w0 + w1 * exp( d1 ) / exp( d0 ) + w2 * exp( d2 ) / exp( d0 ) )
// = exp( d0 ) * ( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) )
// = exp( d0 ) * exp( log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) )
// = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) )
// exp( filteredDepth ) = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) )
//Almost final formula:
// filteredDepth = d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) )
//
//The formula is actually:
// exp( K * filteredDepth ) = w0 * exp( K * d0 ) + w1 * exp( K * d1 ) + w2 * exp( K * d2 ) + ...
//Final formula:
// = d0 + log( w0 + w1 * exp( K * (d1 - d0) ) + w2 * exp( K * (d2 - d0) ) ) / K

//Like in the original filter:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256

// For this shader to work, several pieces need to be defined:
// data_type (i.e. vec3)
// lds_data_type (i.e. vec3, uint)
// lds_definition
// image_store
// image_sample
// decode_lds (optional, i.e. when lds_data_type != data_type)
// Define the property "downscale" if you're doing a downsample.
// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale
// The script uses the template syntax to automatically set the num. of threadgroups
// based on the bound input texture.

vulkan( layout( ogre_s0 ) uniform sampler inputSampler );
vulkan_layout( ogre_t0 ) uniform texture2D inputImage;

layout( vulkan( ogre_u0 ) vk_comma @insertpiece(uav0_pf_type) )
uniform restrict writeonly image2D outputImage;

// 32 = 128 / 4
layout( local_size_x = 32,
local_size_y = 2,
local_size_z = 1 ) in;
@pset( threads_per_group_x, 32 )
@pset( threads_per_group_y, 2 )
@pset( threads_per_group_z, 1 )

@pmul( pixelsPerRow, threads_per_group_x, 4 )
@pset( rowsPerThreadGroup, threads_per_group_y )
@pset( num_thread_groups_z, 1 )

@set( input_width, uav0_width_with_lod )
@set( input_height, uav0_height_with_lod )

@property( horizontal_pass )
@property( downscale ) @mul( input_width, 2 ) @end

/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow
/// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup
@add( num_thread_groups_x, input_width, pixelsPerRow )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, pixelsPerRow )

@add( num_thread_groups_y, input_height, rowsPerThreadGroup )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, rowsPerThreadGroup )
@end @property( !horizontal_pass )
@property( downscale ) @mul( input_height, 2 ) @end

/// Calculate num_thread_groups_
/// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup
/// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow
@add( num_thread_groups_x, input_width, rowsPerThreadGroup )
@sub( num_thread_groups_x, 1 )
@div( num_thread_groups_x, rowsPerThreadGroup )

@add( num_thread_groups_y, input_height, pixelsPerRow )
@sub( num_thread_groups_y, 1 )
@div( num_thread_groups_y, pixelsPerRow )
@end

/// shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];
@insertpiece( lds_definition )

#define C_WEIGHTS( x ) c_weights[(x) >> 2u][(x) & 3u]

// weights_array_count = ( kernel_radius + 1u + 3u ) / 4u
@padd( weights_array_count, kernel_radius, 4 )
@pdiv( weights_array_count, 4 )

vulkan( layout( ogre_P0 ) uniform Params { )
uniform vec4 g_f4OutputSize;
uniform vec4 c_weights[@value( weights_array_count )];
@insertpiece( extra_params )
vulkan( }; )

@insertpiece( lds_data_type ) sampleTex( ivec2 i2Position , vec2 f2Offset )
{
vec2 f2SamplePosition = vec2( i2Position ) + vec2( 0.5f, 0.5f );

f2SamplePosition *= g_f4OutputSize.zw;
///return textureLod( inputImage, f2SamplePosition, 0 ).xyz;
@insertpiece( image_sample )
}

void ComputeFilterKernel( int iPixelOffset, int iLineOffset, ivec2 i2Center, ivec2 i2Inc )
{
@property( !downscale_lq )
@insertpiece( data_type ) outColour[ 4 ];
@insertpiece( data_type ) firstSmpl[ 4 ];
@end @property( downscale_lq )
@insertpiece( data_type ) outColour[ 2 ];
@insertpiece( data_type ) firstSmpl[ 4 ];
@end
@insertpiece( data_type ) RDI[ 4 ] ;

@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end

@property( !downscale_lq )
@foreach( 4, iPixel )
firstSmpl[ @iPixel ].x = RDI[ @iPixel ];
outColour[ @iPixel ].x = C_WEIGHTS( @value( kernel_radius ) );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
firstSmpl[ @iPixel ].x = RDI[ @iPixel * 2 ];
outColour[ @iPixel ].x = C_WEIGHTS( @value( kernel_radius ) );@end
@end

@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end

iPixelOffset += 4;

/// Deal with taps to our left.
/// for ( iIteration = 0; iIteration < radius; iIteration += 1 )
@foreach( kernel_radius, iIteration )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end

@foreach( 4, iPixel )
RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end

@padd( kernel_radius_plus1, kernel_radius, 1 )
@pmul( kernel_radius2x_plus1, kernel_radius, 2 )
@padd( kernel_radius2x_plus1, 1 )

@pmul( kernel_radius2x, kernel_radius, 2 )

/// Deal with taps to our right.
/// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 )
@foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 )
@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ].x)) * C_WEIGHTS( @value( kernel_radius2x ) - @iIteration );@end
@end
@foreach( 3, iPixel )
RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end
@foreach( 1, iPixel )
RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end
@end

@property( !downscale_lq )
@foreach( 4, iPixel )
outColour[ @iPixel ] = firstSmpl[ @iPixel ].x + log( outColour[ @iPixel ].x ) / @value(K);@end
@end @property( downscale_lq )
@foreach( 2, iPixel )
outColour[ @iPixel ] = firstSmpl[ @iPixel ].x + log( outColour[ @iPixel ].x ) / @value(K);@end
@end

/*
foreach( 4, iPixel )
imageStore( outputImage, ivec2( i2Center + iPixel * i2Inc ), vec4( outColour[ iPixel ], 1.0 ) );end
*/
@insertpiece( image_store )
}

void main()
{
/// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 )
/// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 )
@padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 )
@pdiv( samples_per_thread, samples_per_threadgroup, 32 )

@property( horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );

ivec2 i2GroupCoord = ivec2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u );
ivec2 i2Coord = ivec2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y );

@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( @i, gl_LocalInvocationID.y ) , vec2( 0.5f, 0.0f ) );@end

if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), vec2( 0.5f, 0.0f ) );
}

//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();

int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y );
i2Coord.x += @value( kernel_radius );

if( i2Coord.x < int(g_f4OutputSize.x) )
{
ivec2 i2Center = i2Coord + ivec2( 0, gl_LocalInvocationID.y );
ivec2 i2Inc = ivec2 ( 1, 0 );

@property( downscale )
i2Center.x = int( uint( i2Center.x ) >> 1u );
@end

ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end @property( !horizontal_pass )
int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) );
int iLineOffset = int( gl_LocalInvocationID.y );

ivec2 i2GroupCoord = ivec2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u );
ivec2 i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset );

@foreach( samples_per_thread, i )
g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + ivec2( gl_LocalInvocationID.y, @i ) , vec2( 0.0f, 0.5f ) );@end

if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u )
{
g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] =
sampleTex( i2GroupCoord + ivec2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), vec2( 0.0f, 0.5f ) );
}

//memoryBarrierShared ensures our write is visible to everyone else (must be done BEFORE the barrier)
//barrier ensures every thread's execution reached here.
memoryBarrierShared();
barrier();

int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u
i2Coord = ivec2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset );
i2Coord.y += @value( kernel_radius );

if( i2Coord.y < int(g_f4OutputSize.y) )
{
ivec2 i2Center = i2Coord + ivec2( gl_LocalInvocationID.y, 0 );
ivec2 i2Inc = ivec2 ( 0, 1 );

@property( downscale )
i2Center.y = int( uint( i2Center.y ) >> 1u );
@end

ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc );
}
@end
}

+ 49
- 0
test/data/CommonMaterials/Common/GLSL/GaussianBlurLogFilter_ps.glsl View File

@@ -0,0 +1,49 @@
#version ogre_glsl_ver_330

vulkan_layout( ogre_t0 ) uniform texture2D tex;

vulkan_layout( location = 0 )
in block
{
vec2 uv0;
} inPs;

vulkan_layout( location = 0 )
out float fragColour;

in vec4 gl_FragCoord;

vulkan( layout( ogre_P0 ) uniform Params { )
uniform float weights[NUM_WEIGHTS];
vulkan( }; )

void main()
{
float val;
float outColour;
float firstSmpl;

firstSmpl = texelFetch( tex, ivec2( gl_FragCoord.xy ) - ivec2( HORIZONTAL_STEP * (NUM_WEIGHTS - 1),
VERTICAL_STEP * (NUM_WEIGHTS - 1) ), 0 ).x;
outColour = weights[0];

int i;
for( i=NUM_WEIGHTS - 1; (--i) > 0; )
{
val = texelFetch( tex, ivec2( gl_FragCoord.xy ) - ivec2( HORIZONTAL_STEP* i,
VERTICAL_STEP * i ), 0 ).x;
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-i-1];
}

val = texelFetch( tex, ivec2( gl_FragCoord.xy ), 0 ).x;
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-1];

for( i=0; i<NUM_WEIGHTS - 1; ++i )
{
val = texelFetch( tex, ivec2( gl_FragCoord.xy ) + ivec2( HORIZONTAL_STEP* (i+1),
VERTICAL_STEP * (i+1) ), 0 ).x;
outColour += exp( K * (val - firstSmpl) ) * weights[NUM_WEIGHTS-i-2];
}

fragColour = firstSmpl + log( outColour ) / K;
}

+ 9
- 0
test/data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_ps.glsl View File

@@ -0,0 +1,9 @@
#version ogre_glsl_ver_330

vulkan_layout( location = 0 )
out vec4 fragColour;

void main()
{
fragColour = vec4( 0 );
}

+ 31
- 0
test/data/CommonMaterials/Common/GLSL/HiddenAreaMeshVr_vs.glsl View File

@@ -0,0 +1,31 @@
#version ogre_glsl_ver_330

#extension GL_ARB_shader_viewport_layer_array : require

#define float2 vec2
#define float3 vec3
#define float4 vec4

#define float4x4 mat4
#define mul( x, y ) ((x) * (y))

vulkan( layout( ogre_P0 ) uniform Params { )
uniform float4x4 projectionMatrix;
uniform float2 rsDepthRange;
vulkan( }; )

vulkan_layout( OGRE_POSITION ) in vec4 vertex;

vulkan_layout( location = 0 )
out gl_PerVertex
{
vec4 gl_Position;
};

void main()
{
gl_Position.xy = mul( projectionMatrix, float4( vertex.xy, 0.0f, 1.0f ) ).xy;
gl_Position.z = rsDepthRange.x;
gl_Position.w = 1.0f;
gl_ViewportIndex = int( vertex.z );
}

+ 40
- 0
test/data/CommonMaterials/Common/GLSL/MipmapsGaussianBlur_cs.glsl View File

@@ -0,0 +1,40 @@
//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps

//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256

@piece( data_type )vec3@end
@piece( lds_data_type )vec3@end
@piece( lds_definition )
shared vec3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];
@end

@piece( extra_params )
uniform float srcLodIdx;
@end

@piece( image_sample )
return textureLod( vkSampler2D( inputImage, inputSampler ), f2SamplePosition, srcLodIdx ).xyz;
@end

//Overwrite these so that num_thread_groups gets correctly calculated by accounting LOD.
@pset( texture0_width, width_with_lod )
@pset( texture0_height, height_with_lod )

@pset( downscale_lq, 1 )

@piece( image_store )
@property( downscale_lq )
@foreach( 2, iPixel )
imageStore( outputImage, ivec2( i2Center + @iPixel * i2Inc ), vec4( outColour[ @iPixel ], 1.0 ) );@end
@end @property( !downscale_lq )
@foreach( 2, iPixel )
imageStore( outputImage, ivec2( i2Center + @iPixel * i2Inc ),
vec4( (outColour[ @iPixel * 2 ] + outColour[ @iPixel * 2 + 1 ]) * 0.5, 1.0 ) );@end
@end
@end

+ 52
- 0
test/data/CommonMaterials/Common/GLSL/PccDepthCompressor_ps.glsl View File

@@ -0,0 +1,52 @@
#version ogre_glsl_ver_330

#define float2 vec2
#define float3 vec3
#define float4 vec4

#define float3x3 mat3

#define mul( x, y ) ((x) * (y))
#define INLINE

#ifdef VULKAN
#define OGRE_Sample( tex, sampler, uv ) texture( sampler2D( tex, sampler ), uv )
#else
#define OGRE_Sample( tex, sampler, uv ) texture( tex, uv )
#endif

vulkan_layout( ogre_t0 ) uniform texture2D depthTexture;
vulkan( layout( ogre_s0 ) uniform sampler pointSampler );

vulkan( layout( ogre_P0 ) uniform Params { )
uniform float2 projectionParams;
uniform float3 probeShapeHalfSize;
uniform float3 cameraPosLS;

uniform float3x3 viewSpaceToProbeLocalSpace;
vulkan( }; )

#define p_projectionParams projectionParams
#define p_probeShapeHalfSize probeShapeHalfSize
#define p_cameraPosLS cameraPosLS
#define p_viewSpaceToProbeLocalSpace viewSpaceToProbeLocalSpace

vulkan_layout( location = 0 )
in block
{
vec2 uv0;
vec3 cameraDir;
} inPs;

#define HEADER
#include "PccDepthCompressor_ps.any"
#undef HEADER

layout( location = 0 ) out float4 fragColour;

void main()
{
#include "PccDepthCompressor_ps.any"
//RGB writes should be masked off
fragColour = float4( 0, 0, 0, alpha );
}

+ 28
- 0
test/data/CommonMaterials/Common/GLSL/QuadCameraDirNoUV_vs.glsl View File

@@ -0,0 +1,28 @@
#version ogre_glsl_ver_330

vulkan_layout( OGRE_POSITION ) in vec2 vertex;
vulkan_layout( OGRE_NORMAL ) in vec3 normal;

vulkan( layout( ogre_P0 ) uniform Params { )
uniform vec2 rsDepthRange;
uniform mat4 worldViewProj;
vulkan( }; )

out gl_PerVertex
{
vec4 gl_Position;
};

vulkan_layout( location = 0 )
out block
{
vec3 cameraDir;
} outVs;

void main()
{
gl_Position.xy = (worldViewProj * vec4( vertex.xy, 0, 1.0f )).xy;
gl_Position.z = rsDepthRange.y;
gl_Position.w = 1.0f;
outVs.cameraDir.xyz = normal.xyz;
}

+ 28
- 0
test/data/CommonMaterials/Common/GLSL/QuadCameraDir_vs.glsl View File

@@ -0,0 +1,28 @@
#version ogre_glsl_ver_330

vulkan_layout( OGRE_POSITION ) in vec3 vertex;
vulkan_layout( OGRE_NORMAL ) in vec3 normal;
vulkan_layout( OGRE_TEXCOORD0 ) in vec2 uv0;

vulkan( layout( ogre_P0 ) uniform Params { )
uniform mat4 worldViewProj;
vulkan( }; )

out gl_PerVertex
{
vec4 gl_Position;
};

vulkan_layout( location = 0 )
out block
{
vec2 uv0;
vec3 cameraDir;
} outVs;

void main()
{
gl_Position = worldViewProj * vec4( vertex, 1.0 );
outVs.uv0.xy = uv0.xy;
outVs.cameraDir.xyz = normal.xyz;
}

+ 25
- 0
test/data/CommonMaterials/Common/GLSL/Quad_vs.glsl View File

@@ -0,0 +1,25 @@
#version ogre_glsl_ver_330

vulkan_layout( OGRE_POSITION ) in vec3 vertex;
vulkan_layout( OGRE_TEXCOORD0 ) in vec2 uv0;

vulkan( layout( ogre_P0 ) uniform Params { )
uniform mat4 worldViewProj;
vulkan( }; )

out gl_PerVertex
{
vec4 gl_Position;
};

vulkan_layout( location = 0 )
out block
{
vec2 uv0;
} outVs;

void main()
{
gl_Position = worldViewProj * vec4( vertex, 1.0 );
outVs.uv0.xy = uv0.xy;
}

+ 42
- 0
test/data/CommonMaterials/Common/GLSL/RadialDensityMask_ps.glsl View File

@@ -0,0 +1,42 @@
#version ogre_glsl_ver_330

#define float2 vec2
#define float4 vec4

vulkan_layout( location = 0 )
out vec4 fragColour;
in vec4 gl_FragCoord;

vulkan( layout( ogre_P0 ) uniform Params { )
uniform float4 rightEyeStart_radius;
uniform float4 leftEyeCenter_rightEyeCenter;
uniform float2 invBlockResolution;
vulkan( }; )

#define p_leftEyeCenter leftEyeCenter_rightEyeCenter.xy
#define p_rightEyeCenter leftEyeCenter_rightEyeCenter.zw
#define p_rightEyeStart rightEyeStart_radius.x
#define p_radius rightEyeStart_radius.yzw
#define p_invBlockResolution invBlockResolution

void main()
{
float2 eyeCenter = gl_FragCoord.x >= p_rightEyeStart ? p_rightEyeCenter : p_leftEyeCenter;

//We must work in blocks so the reconstruction filter can work properly
float2 toCenter = trunc(gl_FragCoord.xy * 0.125f) * p_invBlockResolution.xy - eyeCenter;
toCenter.x *= 2.0f; //Twice because of stereo (each eye is half the size of the full res)
float distToCenter = length( toCenter );

uvec2 iFragCoordHalf = uvec2( gl_FragCoord.xy * 0.5f );
if( distToCenter < p_radius.x )
discard;
else if( (iFragCoordHalf.x & 0x01u) == (iFragCoordHalf.y & 0x01u) && distToCenter < p_radius.y )
discard;
else if( !((iFragCoordHalf.x & 0x01u) != 0u || (iFragCoordHalf.y & 0x01u) != 0u) && distToCenter < p_radius.z )
discard;
else if( !((iFragCoordHalf.x & 0x03u) != 0u || (iFragCoordHalf.y & 0x03u) != 0u) )
discard;

fragColour = float4( 0, 0, 0, 0 );
}

+ 28
- 0
test/data/CommonMaterials/Common/GLSL/RadialDensityMask_vs.glsl View File

@@ -0,0 +1,28 @@
#version ogre_glsl_ver_330

#extension GL_ARB_shader_viewport_layer_array : require

#ifdef VULKAN
#define gl_VertexID gl_VertexIndex
#endif

vulkan_layout( OGRE_POSITION ) in vec2 vertex;

vulkan( layout( ogre_P0 ) uniform Params { )
uniform float ogreBaseVertex;
uniform vec2 rsDepthRange;
uniform mat4 worldViewProj;
vulkan( }; )

out gl_PerVertex
{
vec4 gl_Position;
};

void main()
{
gl_Position.xy = (worldViewProj * vec4( vertex.xy, 0, 1.0f )).xy;
gl_Position.z = rsDepthRange.x;
gl_Position.w = 1.0f;
gl_ViewportIndex = (gl_VertexID - ogreBaseVertex) >= (3 * 4) ? 1 : 0;
}

+ 19
- 0
test/data/CommonMaterials/Common/GLSL/Resolve_1xFP32_Subsample0_ps.glsl View File

@@ -0,0 +1,19 @@
#version ogre_glsl_ver_330
vulkan_layout( ogre_t0 ) uniform texture2DMS tex;