Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the BC3 shaders compatible with GLSL ES version 310 #12

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
56 changes: 33 additions & 23 deletions bin/Data/bc1.glsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
#version 430 core
#version 310 es

#if defined(GL_ES) && GL_ES == 1
// Desktop GLSL allows the const keyword for either compile-time or
// run-time constants. GLSL ES only allows the keyword for compile-time
// constants. Since we use const on run-time constants, define it to
// nothing.
#define const
#endif

// #include "/media/matias/Datos/SyntaxHighlightingMisc.h"

Expand All @@ -11,7 +19,7 @@ layout( location = 0 ) uniform uint p_numRefinements;

uniform sampler2D srcTex;

layout( rg32ui ) uniform restrict writeonly uimage2D dstTexture;
layout( rgba16ui ) uniform restrict writeonly mediump uimage2D dstTexture;

layout( std430, binding = 1 ) readonly restrict buffer globalBuffer
{
Expand Down Expand Up @@ -109,7 +117,7 @@ void OptimizeColorsBlock( const uint srcPixelsBlock[16], out float outMinEndp16,
// determine covariance matrix
float cov[6];
for( int i = 0; i < 6; ++i )
cov[i] = 0;
cov[i] = 0.0f;

for( int i = 0; i < 16; ++i )
{
Expand Down Expand Up @@ -235,43 +243,43 @@ uint MatchColorsBlock( const uint srcPixelsBlock[16], float3 colour[4] )
float3 currColour;
float dotValue;

currColour = unpackUnorm4x8( srcPixelsBlock[y * 4 + 0] ).xyz * 255.0f;
currColour = unpackUnorm4x8( srcPixelsBlock[y * 4u + 0u] ).xyz * 255.0f;
dotValue = dot( currColour, dir );

ditherDot = ( dotValue * 16.0f ) + ( 3 * ep2[1] + 5 * ep2[0] );
ditherDot = ( dotValue * 16.0f ) + ( 3.0f * ep2[1] + 5.0f * ep2[0] );
if( ditherDot < halfPoint )
step = ( ditherDot < c0Point ) ? 1u : 3u;
else
step = ( ditherDot < c3Point ) ? 2u : 0u;
ep1[0] = dotValue - stops[step];
lmask = step;

currColour = unpackUnorm4x8( srcPixelsBlock[y * 4 + 1] ).xyz * 255.0f;
currColour = unpackUnorm4x8( srcPixelsBlock[y * 4u + 1u] ).xyz * 255.0f;
dotValue = dot( currColour, dir );

ditherDot = ( dotValue * 16.0f ) + ( 7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0] );
ditherDot = ( dotValue * 16.0f ) + ( 7.0f * ep1[0] + 3.0f * ep2[2] + 5.0f * ep2[1] + ep2[0] );
if( ditherDot < halfPoint )
step = ( ditherDot < c0Point ) ? 1u : 3u;
else
step = ( ditherDot < c3Point ) ? 2u : 0u;
ep1[1] = dotValue - stops[step];
lmask |= step << 2u;

currColour = unpackUnorm4x8( srcPixelsBlock[y * 4 + 2] ).xyz * 255.0f;
currColour = unpackUnorm4x8( srcPixelsBlock[y * 4u + 2u] ).xyz * 255.0f;
dotValue = dot( currColour, dir );

ditherDot = ( dotValue * 16.0f ) + ( 7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1] );
ditherDot = ( dotValue * 16.0f ) + ( 7.0f * ep1[1] + 3.0f * ep2[3] + 5.0f * ep2[2] + ep2[1] );
if( ditherDot < halfPoint )
step = ( ditherDot < c0Point ) ? 1u : 3u;
else
step = ( ditherDot < c3Point ) ? 2u : 0u;
ep1[2] = dotValue - stops[step];
lmask |= step << 4u;

currColour = unpackUnorm4x8( srcPixelsBlock[y * 4 + 2] ).xyz * 255.0f;
currColour = unpackUnorm4x8( srcPixelsBlock[y * 4u + 2u] ).xyz * 255.0f;
dotValue = dot( currColour, dir );

ditherDot = ( dotValue * 16.0f ) + ( 7 * ep1[2] + 5 * ep2[3] + ep2[2] );
ditherDot = ( dotValue * 16.0f ) + ( 7.0f * ep1[2] + 5.0f * ep2[3] + ep2[2] );
if( ditherDot < halfPoint )
step = ( ditherDot < c0Point ) ? 1u : 3u;
else
Expand Down Expand Up @@ -320,8 +328,8 @@ bool RefineBlock( const uint srcPixelsBlock[16], uint mask, inout float inOutMin
}
else
{
const float w1Tab[4] = { 3, 0, 2, 1 };
const float prods[4] = { 589824.0f, 2304.0f, 262402.0f, 66562.0f };
const float w1Tab[4] = float[4]( 3.0f, 0.0f, 2.0f, 1.0f );
const float prods[4] = float[4]( 589824.0f, 2304.0f, 262402.0f, 66562.0f );
// ^some magic to save a lot of multiplies in the accumulating loop...
// (precomputed products of weights for least squares system, accumulated inside one 32-bit
// register)
Expand Down Expand Up @@ -384,32 +392,32 @@ float3 quant( float3 srcValue )

void DitherBlock( const uint srcPixBlck[16], out uint dthPixBlck[16] )
{
float3 ep1[4] = { float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ) };
float3 ep2[4] = { float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ) };
float3 ep1[4] = float3[4]( float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ) );
float3 ep2[4] = float3[4]( float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ) );

for( uint y = 0u; y < 16u; y += 4u )
{
float3 srcPixel, dithPixel;

srcPixel = unpackUnorm4x8( srcPixBlck[y + 0u] ).xyz * 255.0f;
dithPixel = quant( srcPixel + trunc( ( 3 * ep2[1] + 5 * ep2[0] ) * ( 1.0f / 16.0f ) ) );
dithPixel = quant( srcPixel + trunc( ( 3.0f * ep2[1] + 5.0f * ep2[0] ) * ( 1.0f / 16.0f ) ) );
ep1[0] = srcPixel - dithPixel;
dthPixBlck[y + 0u] = packUnorm4x8( float4( dithPixel * ( 1.0f / 255.0f ), 1.0f ) );

srcPixel = unpackUnorm4x8( srcPixBlck[y + 1u] ).xyz * 255.0f;
dithPixel = quant(
srcPixel + trunc( ( 7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0] ) * ( 1.0f / 16.0f ) ) );
srcPixel + trunc( ( 7.0f * ep1[0] + 3.0f * ep2[2] + 5.0f * ep2[1] + ep2[0] ) * ( 1.0f / 16.0f ) ) );
ep1[1] = srcPixel - dithPixel;
dthPixBlck[y + 1u] = packUnorm4x8( float4( dithPixel * ( 1.0f / 255.0f ), 1.0f ) );

srcPixel = unpackUnorm4x8( srcPixBlck[y + 2u] ).xyz * 255.0f;
dithPixel = quant(
srcPixel + trunc( ( 7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1] ) * ( 1.0f / 16.0f ) ) );
srcPixel + trunc( ( 7.0f * ep1[1] + 3.0f * ep2[3] + 5.0f * ep2[2] + ep2[1] ) * ( 1.0f / 16.0f ) ) );
ep1[2] = srcPixel - dithPixel;
dthPixBlck[y + 2u] = packUnorm4x8( float4( dithPixel * ( 1.0f / 255.0f ), 1.0f ) );

srcPixel = unpackUnorm4x8( srcPixBlck[y + 3u] ).xyz * 255.0f;
dithPixel = quant( srcPixel + trunc( ( 7 * ep1[2] + 5 * ep2[3] + ep2[2] ) * ( 1.0f / 16.0f ) ) );
dithPixel = quant( srcPixel + trunc( ( 7.0f * ep1[2] + 5.0f * ep2[3] + ep2[2] ) * ( 1.0f / 16.0f ) ) );
ep1[3] = srcPixel - dithPixel;
dthPixBlck[y + 3u] = packUnorm4x8( float4( dithPixel * ( 1.0f / 255.0f ), 1.0f ) );

Expand Down Expand Up @@ -505,10 +513,12 @@ void main()
mask ^= 0x55555555u;
}

uint2 outputBytes;
outputBytes.x = uint( maxEndp16 ) | ( uint( minEndp16 ) << 16u );
outputBytes.y = mask;
uint4 outputBytes;
outputBytes.x = uint( maxEndp16 );
outputBytes.y = uint( minEndp16 );
outputBytes.z = mask & 0xFFFFu;
outputBytes.w = mask >> 16u;

uint2 dstUV = gl_GlobalInvocationID.xy;
imageStore( dstTexture, int2( dstUV ), uint4( outputBytes.xy, 0u, 0u ) );
imageStore( dstTexture, int2( dstUV ), outputBytes );
}
2 changes: 1 addition & 1 deletion bin/Data/bc1_dither.glsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#version 430 core
#version 310 es

#define BC1_DITHER
#include "bc1.glsl"
31 changes: 20 additions & 11 deletions bin/Data/bc4.glsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
#version 430 core
#version 310 es

#if defined(GL_ES) && GL_ES == 1
// Desktop GLSL allows the const keyword for either compile-time or
// run-time constants. GLSL ES only allows the keyword for compile-time
// constants. Since we use const on run-time constants, define it to
// nothing.
#define const
#endif

// #include "/media/matias/Datos/SyntaxHighlightingMisc.h"

Expand All @@ -8,14 +16,14 @@
shared float2 g_minMaxValues[4u * 4u * 4u];
shared uint2 g_mask[4u * 4u];

layout( location = 0 ) uniform float2 params;
layout( location = 0 ) uniform uint2 params;

#define p_channelIdx params.x
#define p_useSNorm params.y

uniform sampler2D srcTex;

layout( rg32ui ) uniform restrict writeonly uimage2D dstTexture;
layout( rgba16ui ) uniform restrict writeonly mediump uimage2D dstTexture;

layout( local_size_x = 4, //
local_size_y = 4, //
Expand Down Expand Up @@ -47,7 +55,7 @@ void main()
const uint2 pixelsToLoad = pixelsToLoadBase + uint2( i, blockThreadId );

const float4 value = OGRE_Load2D( srcTex, int2( pixelsToLoad ), 0 ).xyzw;
srcPixel[i] = p_channelIdx == 0 ? value.x : ( p_channelIdx == 1 ? value.y : value.w );
srcPixel[i] = p_channelIdx == 0u ? value.x : ( p_channelIdx == 1u ? value.y : value.w );
srcPixel[i] *= 255.0f;
}

Expand Down Expand Up @@ -77,8 +85,8 @@ void main()
float dist = maxVal - minVal;
float dist4 = dist * 4.0f;
float dist2 = dist * 2.0f;
float bias = ( dist < 8 ) ? ( dist - 1 ) : ( trunc( dist * 0.5f ) + 2 );
bias -= minVal * 7;
float bias = ( dist < 8.0f ) ? ( dist - 1.0f ) : ( trunc( dist * 0.5f ) + 2.0f );
bias -= minVal * 7.0f;

uint mask0 = 0u, mask1 = 0u;

Expand Down Expand Up @@ -134,9 +142,9 @@ void main()
if( blockThreadId == 0u )
{
// Save data
uint2 outputBytes;
uint4 outputBytes;

if( p_useSNorm != 0.0f )
if( p_useSNorm != 0u )
{
outputBytes.x =
packSnorm4x8( float4( maxVal * ( 1.0f / 255.0f ) * 2.0f - 1.0f,
Expand All @@ -147,10 +155,11 @@ void main()
outputBytes.x = packUnorm4x8(
float4( maxVal * ( 1.0f / 255.0f ), minVal * ( 1.0f / 255.0f ), 0.0f, 0.0f ) );
}
outputBytes.x |= g_mask[maskIdxBase].x;
outputBytes.y = g_mask[maskIdxBase].y;
outputBytes.y = g_mask[maskIdxBase].x >> 16u;
outputBytes.z = g_mask[maskIdxBase].y & 0xFFFFu;
outputBytes.w = g_mask[maskIdxBase].y >> 16u;

uint2 dstUV = gl_GlobalInvocationID.yz;
imageStore( dstTexture, int2( dstUV ), uint4( outputBytes.xy, 0u, 0u ) );
imageStore( dstTexture, int2( dstUV ), outputBytes );
}
}
8 changes: 4 additions & 4 deletions bin/Data/etc2_rgba_stitch.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// This compute shader merely stitches them together to form the final result
// It's also used by RG11 driver to stitch two R11 into one RG11

#version 430 core
#version 310 es

// #include "/media/matias/Datos/SyntaxHighlightingMisc.h"

Expand All @@ -13,9 +13,9 @@ layout( local_size_x = 8, //
local_size_y = 8, //
local_size_z = 1 ) in;

layout( binding = 0 ) uniform usampler2D srcRGB;
layout( binding = 1 ) uniform usampler2D srcAlpha;
layout( rgba32ui ) uniform restrict writeonly uimage2D dstTexture;
layout( binding = 0 ) uniform highp usampler2D srcRGB;
layout( binding = 1 ) uniform highp usampler2D srcAlpha;
layout( rgba32ui ) uniform restrict writeonly highp uimage2D dstTexture;

void main()
{
Expand Down
1 change: 1 addition & 0 deletions include/betsy/EncoderGL.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ namespace betsy
{
PFG_RGBA32_UINT,
PFG_RGBA32_FLOAT,
PFG_RGBA16_UINT,
PFG_RGBA16_FLOAT,
PFG_R32_FLOAT,
PFG_RG32_UINT,
Expand Down
8 changes: 8 additions & 0 deletions src/PlatformGL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,20 @@ namespace betsy

g_glContext = SDL_GL_CreateContext( g_sdlWindow );

const SDL_bool has_required_extensions = SDL_GL_ExtensionSupported( "GL_ARB_ES3_1_compatibility" );

if( !g_glContext )
{
fprintf( stderr, "GL Context creation failed.\n" );
SDL_Quit();
abort();
}
else if ( has_required_extensions == SDL_FALSE )
{
fprintf( stderr, "GL Context lacks required extensions.\n" );
SDL_Quit();
abort();
}
else
{
printf( "GL Context creation suceeded.\n" );
Expand Down
1 change: 1 addition & 0 deletions src/betsy/CpuImage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ namespace betsy
case PFG_RGBA32_UINT:
case PFG_RGBA32_FLOAT:
return 4u * 4u;
case PFG_RGBA16_UINT:
case PFG_RGBA16_FLOAT:
return 2u * 4u;
case PFG_R32_FLOAT:
Expand Down
6 changes: 3 additions & 3 deletions src/betsy/EncoderBC1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ namespace betsy
{
bindTexture( 0u, m_srcTexture );
bindComputePso( m_bc1Pso );
bindUav( 0u, m_bc1TargetRes, PFG_RG32_UINT, ResourceAccess::Write );
bindUav( 0u, m_bc1TargetRes, PFG_RGBA16_UINT, ResourceAccess::Write );
bindUavBuffer( 1u, m_bc1TablesSsbo, 0u, sizeof( Bc1Tables ) );

glUniform1ui( 0, 2u );
Expand All @@ -128,10 +128,10 @@ namespace betsy
{
// Compress Alpha too (using BC4)
bindComputePso( m_bc4Pso );
bindUav( 0u, m_bc4TargetRes, PFG_RG32_UINT, ResourceAccess::Write );
bindUav( 0u, m_bc4TargetRes, PFG_RGBA16_UINT, ResourceAccess::Write );

// p_channelIdx, p_useSNorm
glUniform2f( 0, 3.0f, 0.0f );
glUniform2ui( 0, 3u, 0u );

glDispatchCompute( 1u, //
alignToNextMultiple( m_width, 16u ) / 16u,
Expand Down
4 changes: 2 additions & 2 deletions src/betsy/EncoderBC4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ namespace betsy
const size_t numChannels = m_bc4TargetRes[1] ? 2u : 1u;
for( size_t i = 0u; i < numChannels; ++i )
{
bindUav( 0u, m_bc4TargetRes[i], PFG_RG32_UINT, ResourceAccess::Write );
bindUav( 0u, m_bc4TargetRes[i], PFG_RGBA16_UINT, ResourceAccess::Write );

// p_channelIdx, p_useSNorm
glUniform2f( 0, i == 0u ? 0.0f : 1.0f, m_encodeSNorm ? 1.0f : 0.0f );
glUniform2ui( 0, i, m_encodeSNorm ? 1u : 0u );

glDispatchCompute( 1u, //
alignToNextMultiple( m_width, 16u ) / 16u,
Expand Down
7 changes: 7 additions & 0 deletions src/betsy/EncoderGL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ namespace betsy
return GL_RGBA32UI;
case PFG_RGBA32_FLOAT:
return GL_RGBA32F;
case PFG_RGBA16_UINT:
return GL_RGBA16UI;
case PFG_RGBA16_FLOAT:
return GL_RGBA16F;
case PFG_R32_FLOAT:
Expand Down Expand Up @@ -104,6 +106,7 @@ namespace betsy
{
case PFG_RGBA32_UINT:
case PFG_RGBA32_FLOAT:
case PFG_RGBA16_UINT:
case PFG_RGBA16_FLOAT:
case PFG_RGBA8_UNORM:
case PFG_RGBA8_UNORM_SRGB:
Expand Down Expand Up @@ -134,6 +137,7 @@ namespace betsy
switch( pixelFormat )
{
case PFG_RGBA32_UINT:
case PFG_RGBA16_UINT:
format = GL_RGBA_INTEGER;
break;
case PFG_R32_FLOAT:
Expand Down Expand Up @@ -180,6 +184,9 @@ namespace betsy
case PFG_RG32_UINT:
type = GL_UNSIGNED_INT;
break;
case PFG_RGBA16_UINT:
type = GL_UNSIGNED_SHORT;
break;
case PFG_RGBA8_UNORM:
case PFG_RGBA8_UNORM_SRGB:
type = GL_UNSIGNED_INT_8_8_8_8_REV;
Expand Down