darksylinc · nanley · Aug 9, 2022 · Nov 9, 2022 · Aug 9, 2022 · Aug 29, 2022
diff --git a/bin/Data/bc1.glsl b/bin/Data/bc1.glsl
@@ -1,4 +1,12 @@
-#version 430 core
+#version 310 es
+
+#if defined(GL_ES) && GL_ES == 1
+	// Desktop GLSL allows the const keyword for either compile-time or
+	// run-time constants. GLSL ES only allows the keyword for compile-time
+	// constants. Since we use const on run-time constants, define it to
+	// nothing.
+	#define const
+#endif
 
 // #include "/media/matias/Datos/SyntaxHighlightingMisc.h"
 
@@ -11,7 +19,7 @@ layout( location = 0 ) uniform uint p_numRefinements;
 
 uniform sampler2D srcTex;
 
-layout( rg32ui ) uniform restrict writeonly uimage2D dstTexture;
+layout( rgba16ui ) uniform restrict writeonly mediump uimage2D dstTexture;
 
 layout( std430, binding = 1 ) readonly restrict buffer globalBuffer
 {
@@ -109,7 +117,7 @@ void OptimizeColorsBlock( const uint srcPixelsBlock[16], out float outMinEndp16,
 	// determine covariance matrix
 	float cov[6];
 	for( int i = 0; i < 6; ++i )
-		cov[i] = 0;
+		cov[i] = 0.0f;
 
 	for( int i = 0; i < 16; ++i )
 	{
@@ -235,43 +243,43 @@ uint MatchColorsBlock( const uint srcPixelsBlock[16], float3 colour[4] )
 		float3 currColour;
 		float dotValue;
 
-		currColour = unpackUnorm4x8( srcPixelsBlock[y * 4 + 0] ).xyz * 255.0f;
+		currColour = unpackUnorm4x8( srcPixelsBlock[y * 4u + 0u] ).xyz * 255.0f;
 		dotValue = dot( currColour, dir );
 
-		ditherDot = ( dotValue * 16.0f ) + ( 3 * ep2[1] + 5 * ep2[0] );
+		ditherDot = ( dotValue * 16.0f ) + ( 3.0f * ep2[1] + 5.0f * ep2[0] );
 		if( ditherDot < halfPoint )
 			step = ( ditherDot < c0Point ) ? 1u : 3u;
 		else
 			step = ( ditherDot < c3Point ) ? 2u : 0u;
 		ep1[0] = dotValue - stops[step];
 		lmask = step;
 
-		currColour = unpackUnorm4x8( srcPixelsBlock[y * 4 + 1] ).xyz * 255.0f;
+		currColour = unpackUnorm4x8( srcPixelsBlock[y * 4u + 1u] ).xyz * 255.0f;
 		dotValue = dot( currColour, dir );
 
-		ditherDot = ( dotValue * 16.0f ) + ( 7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0] );
+		ditherDot = ( dotValue * 16.0f ) + ( 7.0f * ep1[0] + 3.0f * ep2[2] + 5.0f * ep2[1] + ep2[0] );
 		if( ditherDot < halfPoint )
 			step = ( ditherDot < c0Point ) ? 1u : 3u;
 		else
 			step = ( ditherDot < c3Point ) ? 2u : 0u;
 		ep1[1] = dotValue - stops[step];
 		lmask |= step << 2u;
 
-		currColour = unpackUnorm4x8( srcPixelsBlock[y * 4 + 2] ).xyz * 255.0f;
+		currColour = unpackUnorm4x8( srcPixelsBlock[y * 4u + 2u] ).xyz * 255.0f;
 		dotValue = dot( currColour, dir );
 
-		ditherDot = ( dotValue * 16.0f ) + ( 7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1] );
+		ditherDot = ( dotValue * 16.0f ) + ( 7.0f * ep1[1] + 3.0f * ep2[3] + 5.0f * ep2[2] + ep2[1] );
 		if( ditherDot < halfPoint )
 			step = ( ditherDot < c0Point ) ? 1u : 3u;
 		else
 			step = ( ditherDot < c3Point ) ? 2u : 0u;
 		ep1[2] = dotValue - stops[step];
 		lmask |= step << 4u;
 
-		currColour = unpackUnorm4x8( srcPixelsBlock[y * 4 + 2] ).xyz * 255.0f;
+		currColour = unpackUnorm4x8( srcPixelsBlock[y * 4u + 2u] ).xyz * 255.0f;
 		dotValue = dot( currColour, dir );
 
-		ditherDot = ( dotValue * 16.0f ) + ( 7 * ep1[2] + 5 * ep2[3] + ep2[2] );
+		ditherDot = ( dotValue * 16.0f ) + ( 7.0f * ep1[2] + 5.0f * ep2[3] + ep2[2] );
 		if( ditherDot < halfPoint )
 			step = ( ditherDot < c0Point ) ? 1u : 3u;
 		else
@@ -320,8 +328,8 @@ bool RefineBlock( const uint srcPixelsBlock[16], uint mask, inout float inOutMin
 	}
 	else
 	{
-		const float w1Tab[4] = { 3, 0, 2, 1 };
-		const float prods[4] = { 589824.0f, 2304.0f, 262402.0f, 66562.0f };
+		const float w1Tab[4] = float[4]( 3.0f, 0.0f, 2.0f, 1.0f );
+		const float prods[4] = float[4]( 589824.0f, 2304.0f, 262402.0f, 66562.0f );
 		// ^some magic to save a lot of multiplies in the accumulating loop...
 		// (precomputed products of weights for least squares system, accumulated inside one 32-bit
 		// register)
@@ -384,32 +392,32 @@ float3 quant( float3 srcValue )
 
 void DitherBlock( const uint srcPixBlck[16], out uint dthPixBlck[16] )
 {
-	float3 ep1[4] = { float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ) };
-	float3 ep2[4] = { float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ) };
+	float3 ep1[4] = float3[4]( float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ) );
+	float3 ep2[4] = float3[4]( float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ), float3( 0, 0, 0 ) );
 
 	for( uint y = 0u; y < 16u; y += 4u )
 	{
 		float3 srcPixel, dithPixel;
 
 		srcPixel = unpackUnorm4x8( srcPixBlck[y + 0u] ).xyz * 255.0f;
-		dithPixel = quant( srcPixel + trunc( ( 3 * ep2[1] + 5 * ep2[0] ) * ( 1.0f / 16.0f ) ) );
+		dithPixel = quant( srcPixel + trunc( ( 3.0f * ep2[1] + 5.0f * ep2[0] ) * ( 1.0f / 16.0f ) ) );
 		ep1[0] = srcPixel - dithPixel;
 		dthPixBlck[y + 0u] = packUnorm4x8( float4( dithPixel * ( 1.0f / 255.0f ), 1.0f ) );
 
 		srcPixel = unpackUnorm4x8( srcPixBlck[y + 1u] ).xyz * 255.0f;
 		dithPixel = quant(
-			srcPixel + trunc( ( 7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0] ) * ( 1.0f / 16.0f ) ) );
+			srcPixel + trunc( ( 7.0f * ep1[0] + 3.0f * ep2[2] + 5.0f * ep2[1] + ep2[0] ) * ( 1.0f / 16.0f ) ) );
 		ep1[1] = srcPixel - dithPixel;
 		dthPixBlck[y + 1u] = packUnorm4x8( float4( dithPixel * ( 1.0f / 255.0f ), 1.0f ) );
 
 		srcPixel = unpackUnorm4x8( srcPixBlck[y + 2u] ).xyz * 255.0f;
 		dithPixel = quant(
-			srcPixel + trunc( ( 7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1] ) * ( 1.0f / 16.0f ) ) );
+			srcPixel + trunc( ( 7.0f * ep1[1] + 3.0f * ep2[3] + 5.0f * ep2[2] + ep2[1] ) * ( 1.0f / 16.0f ) ) );
 		ep1[2] = srcPixel - dithPixel;
 		dthPixBlck[y + 2u] = packUnorm4x8( float4( dithPixel * ( 1.0f / 255.0f ), 1.0f ) );
 
 		srcPixel = unpackUnorm4x8( srcPixBlck[y + 3u] ).xyz * 255.0f;
-		dithPixel = quant( srcPixel + trunc( ( 7 * ep1[2] + 5 * ep2[3] + ep2[2] ) * ( 1.0f / 16.0f ) ) );
+		dithPixel = quant( srcPixel + trunc( ( 7.0f * ep1[2] + 5.0f * ep2[3] + ep2[2] ) * ( 1.0f / 16.0f ) ) );
 		ep1[3] = srcPixel - dithPixel;
 		dthPixBlck[y + 3u] = packUnorm4x8( float4( dithPixel * ( 1.0f / 255.0f ), 1.0f ) );
 
@@ -505,10 +513,12 @@ void main()
 		mask ^= 0x55555555u;
 	}
 
-	uint2 outputBytes;
-	outputBytes.x = uint( maxEndp16 ) | ( uint( minEndp16 ) << 16u );
-	outputBytes.y = mask;
+	uint4 outputBytes;
+	outputBytes.x = uint( maxEndp16 );
+	outputBytes.y = uint( minEndp16 );
+	outputBytes.z = mask & 0xFFFFu;
+	outputBytes.w = mask >> 16u;
 
 	uint2 dstUV = gl_GlobalInvocationID.xy;
-	imageStore( dstTexture, int2( dstUV ), uint4( outputBytes.xy, 0u, 0u ) );
+	imageStore( dstTexture, int2( dstUV ), outputBytes );
 }
diff --git a/bin/Data/bc1_dither.glsl b/bin/Data/bc1_dither.glsl
@@ -1,4 +1,4 @@
-#version 430 core
+#version 310 es
 
 #define BC1_DITHER
 #include "bc1.glsl"
diff --git a/bin/Data/bc4.glsl b/bin/Data/bc4.glsl
@@ -1,4 +1,12 @@
-#version 430 core
+#version 310 es
+
+#if defined(GL_ES) && GL_ES == 1
+	// Desktop GLSL allows the const keyword for either compile-time or
+	// run-time constants. GLSL ES only allows the keyword for compile-time
+	// constants. Since we use const on run-time constants, define it to
+	// nothing.
+	#define const
+#endif
 
 // #include "/media/matias/Datos/SyntaxHighlightingMisc.h"
 
@@ -8,14 +16,14 @@
 shared float2 g_minMaxValues[4u * 4u * 4u];
 shared uint2 g_mask[4u * 4u];
 
-layout( location = 0 ) uniform float2 params;
+layout( location = 0 ) uniform uint2 params;
 
 #define p_channelIdx params.x
 #define p_useSNorm params.y
 
 uniform sampler2D srcTex;
 
-layout( rg32ui ) uniform restrict writeonly uimage2D dstTexture;
+layout( rgba16ui ) uniform restrict writeonly mediump uimage2D dstTexture;
 
 layout( local_size_x = 4,  //
 		local_size_y = 4,  //
@@ -47,7 +55,7 @@ void main()
 		const uint2 pixelsToLoad = pixelsToLoadBase + uint2( i, blockThreadId );
 
 		const float4 value = OGRE_Load2D( srcTex, int2( pixelsToLoad ), 0 ).xyzw;
-		srcPixel[i] = p_channelIdx == 0 ? value.x : ( p_channelIdx == 1 ? value.y : value.w );
+		srcPixel[i] = p_channelIdx == 0u ? value.x : ( p_channelIdx == 1u ? value.y : value.w );
 		srcPixel[i] *= 255.0f;
 	}
 
@@ -77,8 +85,8 @@ void main()
 	float dist = maxVal - minVal;
 	float dist4 = dist * 4.0f;
 	float dist2 = dist * 2.0f;
-	float bias = ( dist < 8 ) ? ( dist - 1 ) : ( trunc( dist * 0.5f ) + 2 );
-	bias -= minVal * 7;
+	float bias = ( dist < 8.0f ) ? ( dist - 1.0f ) : ( trunc( dist * 0.5f ) + 2.0f );
+	bias -= minVal * 7.0f;
 
 	uint mask0 = 0u, mask1 = 0u;
 
@@ -134,9 +142,9 @@ void main()
 	if( blockThreadId == 0u )
 	{
 		// Save data
-		uint2 outputBytes;
+		uint4 outputBytes;
 
-		if( p_useSNorm != 0.0f )
+		if( p_useSNorm != 0u )
 		{
 			outputBytes.x =
 				packSnorm4x8( float4( maxVal * ( 1.0f / 255.0f ) * 2.0f - 1.0f,
@@ -147,10 +155,11 @@ void main()
 			outputBytes.x = packUnorm4x8(
 				float4( maxVal * ( 1.0f / 255.0f ), minVal * ( 1.0f / 255.0f ), 0.0f, 0.0f ) );
 		}
-		outputBytes.x |= g_mask[maskIdxBase].x;
-		outputBytes.y = g_mask[maskIdxBase].y;
+		outputBytes.y = g_mask[maskIdxBase].x >> 16u;
+		outputBytes.z = g_mask[maskIdxBase].y & 0xFFFFu;
+		outputBytes.w = g_mask[maskIdxBase].y >> 16u;
 
 		uint2 dstUV = gl_GlobalInvocationID.yz;
-		imageStore( dstTexture, int2( dstUV ), uint4( outputBytes.xy, 0u, 0u ) );
+		imageStore( dstTexture, int2( dstUV ), outputBytes );
 	}
 }
diff --git a/bin/Data/etc2_rgba_stitch.glsl b/bin/Data/etc2_rgba_stitch.glsl
@@ -2,7 +2,7 @@
 // This compute shader merely stitches them together to form the final result
 // It's also used by RG11 driver to stitch two R11 into one RG11
 
-#version 430 core
+#version 310 es
 
 // #include "/media/matias/Datos/SyntaxHighlightingMisc.h"
 
@@ -13,9 +13,9 @@ layout( local_size_x = 8,  //
 		local_size_y = 8,  //
 		local_size_z = 1 ) in;
 
-layout( binding = 0 ) uniform usampler2D srcRGB;
-layout( binding = 1 ) uniform usampler2D srcAlpha;
-layout( rgba32ui ) uniform restrict writeonly uimage2D dstTexture;
+layout( binding = 0 ) uniform highp usampler2D srcRGB;
+layout( binding = 1 ) uniform highp usampler2D srcAlpha;
+layout( rgba32ui ) uniform restrict writeonly highp uimage2D dstTexture;
 
 void main()
 {

diff --git a/include/betsy/EncoderGL.h b/include/betsy/EncoderGL.h
@@ -11,6 +11,7 @@ namespace betsy
 	{
 		PFG_RGBA32_UINT,
 		PFG_RGBA32_FLOAT,
+		PFG_RGBA16_UINT,
 		PFG_RGBA16_FLOAT,
 		PFG_R32_FLOAT,
 		PFG_RG32_UINT,

diff --git a/src/PlatformGL.cpp b/src/PlatformGL.cpp
@@ -69,12 +69,20 @@ namespace betsy
 
 		g_glContext = SDL_GL_CreateContext( g_sdlWindow );
 
+		const SDL_bool has_required_extensions = SDL_GL_ExtensionSupported( "GL_ARB_ES3_1_compatibility" );
+
 		if( !g_glContext )
 		{
 			fprintf( stderr, "GL Context creation failed.\n" );
 			SDL_Quit();
 			abort();
 		}
+		else if ( has_required_extensions == SDL_FALSE )
+		{
+			fprintf( stderr, "GL Context lacks required extensions.\n" );
+			SDL_Quit();
+			abort();
+		}
 		else
 		{
 			printf( "GL Context creation suceeded.\n" );

diff --git a/src/betsy/CpuImage.cpp b/src/betsy/CpuImage.cpp
@@ -167,6 +167,7 @@ namespace betsy
 		case PFG_RGBA32_UINT:
 		case PFG_RGBA32_FLOAT:
 			return 4u * 4u;
+		case PFG_RGBA16_UINT:
 		case PFG_RGBA16_FLOAT:
 			return 2u * 4u;
 		case PFG_R32_FLOAT:

diff --git a/src/betsy/EncoderBC1.cpp b/src/betsy/EncoderBC1.cpp
@@ -116,7 +116,7 @@ namespace betsy
 	{
 		bindTexture( 0u, m_srcTexture );
 		bindComputePso( m_bc1Pso );
-		bindUav( 0u, m_bc1TargetRes, PFG_RG32_UINT, ResourceAccess::Write );
+		bindUav( 0u, m_bc1TargetRes, PFG_RGBA16_UINT, ResourceAccess::Write );
 		bindUavBuffer( 1u, m_bc1TablesSsbo, 0u, sizeof( Bc1Tables ) );
 
 		glUniform1ui( 0, 2u );
@@ -128,10 +128,10 @@ namespace betsy
 		{
 			// Compress Alpha too (using BC4)
 			bindComputePso( m_bc4Pso );
-			bindUav( 0u, m_bc4TargetRes, PFG_RG32_UINT, ResourceAccess::Write );
+			bindUav( 0u, m_bc4TargetRes, PFG_RGBA16_UINT, ResourceAccess::Write );
 
 			// p_channelIdx, p_useSNorm
-			glUniform2f( 0, 3.0f, 0.0f );
+			glUniform2ui( 0, 3u, 0u );
 
 			glDispatchCompute( 1u,  //
 							   alignToNextMultiple( m_width, 16u ) / 16u,

diff --git a/src/betsy/EncoderBC4.cpp b/src/betsy/EncoderBC4.cpp
@@ -107,10 +107,10 @@ namespace betsy
 		const size_t numChannels = m_bc4TargetRes[1] ? 2u : 1u;
 		for( size_t i = 0u; i < numChannels; ++i )
 		{
-			bindUav( 0u, m_bc4TargetRes[i], PFG_RG32_UINT, ResourceAccess::Write );
+			bindUav( 0u, m_bc4TargetRes[i], PFG_RGBA16_UINT, ResourceAccess::Write );
 
 			// p_channelIdx, p_useSNorm
-			glUniform2f( 0, i == 0u ? 0.0f : 1.0f, m_encodeSNorm ? 1.0f : 0.0f );
+			glUniform2ui( 0, i, m_encodeSNorm ? 1u : 0u );
 
 			glDispatchCompute( 1u,  //
 							   alignToNextMultiple( m_width, 16u ) / 16u,

diff --git a/src/betsy/EncoderGL.cpp b/src/betsy/EncoderGL.cpp
@@ -60,6 +60,8 @@ namespace betsy
 			return GL_RGBA32UI;
 		case PFG_RGBA32_FLOAT:
 			return GL_RGBA32F;
+		case PFG_RGBA16_UINT:
+			return GL_RGBA16UI;
 		case PFG_RGBA16_FLOAT:
 			return GL_RGBA16F;
 		case PFG_R32_FLOAT:
@@ -104,6 +106,7 @@ namespace betsy
 		{
 		case PFG_RGBA32_UINT:
 		case PFG_RGBA32_FLOAT:
+		case PFG_RGBA16_UINT:
 		case PFG_RGBA16_FLOAT:
 		case PFG_RGBA8_UNORM:
 		case PFG_RGBA8_UNORM_SRGB:
@@ -134,6 +137,7 @@ namespace betsy
 		switch( pixelFormat )
 		{
 		case PFG_RGBA32_UINT:
+		case PFG_RGBA16_UINT:
 			format = GL_RGBA_INTEGER;
 			break;
 		case PFG_R32_FLOAT:
@@ -180,6 +184,9 @@ namespace betsy
 		case PFG_RG32_UINT:
 			type = GL_UNSIGNED_INT;
 			break;
+		case PFG_RGBA16_UINT:
+			type = GL_UNSIGNED_SHORT;
+			break;
 		case PFG_RGBA8_UNORM:
 		case PFG_RGBA8_UNORM_SRGB:
 			type = GL_UNSIGNED_INT_8_8_8_8_REV;