165 lines
6 KiB
Diff
165 lines
6 KiB
Diff
|
diff --git a/thirdparty/etcpak/ProcessRGB.cpp b/thirdparty/etcpak/ProcessRGB.cpp
|
||
|
index 4dc3bf23af..0caa687bc6 100644
|
||
|
--- a/thirdparty/etcpak/ProcessRGB.cpp
|
||
|
+++ b/thirdparty/etcpak/ProcessRGB.cpp
|
||
|
@@ -4181,3 +4181,145 @@ void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size
|
||
|
}
|
||
|
while( --blocks );
|
||
|
}
|
||
|
+
|
||
|
+// -- GODOT start --
|
||
|
+void CompressEtc2R8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
|
||
|
+{
|
||
|
+ int w = 0;
|
||
|
+ uint8_t r[4*4];
|
||
|
+ do
|
||
|
+ {
|
||
|
+#ifdef __SSE4_1__
|
||
|
+ __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
|
||
|
+ __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
|
||
|
+ __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
|
||
|
+ __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
|
||
|
+
|
||
|
+ _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
|
||
|
+
|
||
|
+ __m128i c0 = _mm_castps_si128( px0 );
|
||
|
+ __m128i c1 = _mm_castps_si128( px1 );
|
||
|
+ __m128i c2 = _mm_castps_si128( px2 );
|
||
|
+ __m128i c3 = _mm_castps_si128( px3 );
|
||
|
+
|
||
|
+ __m128i mask = _mm_setr_epi32( 0x0e0a0602, -1, -1, -1 );
|
||
|
+
|
||
|
+ __m128i a0 = _mm_shuffle_epi8( c0, mask );
|
||
|
+ __m128i a1 = _mm_shuffle_epi8( c1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
|
||
|
+ __m128i a2 = _mm_shuffle_epi8( c2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
|
||
|
+ __m128i a3 = _mm_shuffle_epi8( c3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
|
||
|
+
|
||
|
+ __m128i s0 = _mm_or_si128( a0, a1 );
|
||
|
+ __m128i s1 = _mm_or_si128( a2, a3 );
|
||
|
+ __m128i s2 = _mm_or_si128( s0, s1 );
|
||
|
+
|
||
|
+ _mm_store_si128( (__m128i*)r, s2 );
|
||
|
+
|
||
|
+ src += 4;
|
||
|
+#else
|
||
|
+ auto ptr8 = r;
|
||
|
+ for( int x=0; x<4; x++ )
|
||
|
+ {
|
||
|
+ auto v = *src;
|
||
|
+ *ptr8++ = (v & 0xff0000) >> 16;
|
||
|
+ src += width;
|
||
|
+ v = *src;
|
||
|
+ *ptr8++ = (v & 0xff0000) >> 16;
|
||
|
+ src += width;
|
||
|
+ v = *src;
|
||
|
+ *ptr8++ = (v & 0xff0000) >> 16;
|
||
|
+ src += width;
|
||
|
+ v = *src;
|
||
|
+ *ptr8++ = (v & 0xff0000) >> 16;
|
||
|
+ src -= width * 3 - 1;
|
||
|
+ }
|
||
|
+#endif
|
||
|
+ if( ++w == width/4 )
|
||
|
+ {
|
||
|
+ src += width * 3;
|
||
|
+ w = 0;
|
||
|
+ }
|
||
|
+ *dst++ = ProcessAlpha_ETC2( r );
|
||
|
+ }
|
||
|
+ while( --blocks );
|
||
|
+}
|
||
|
+
|
||
|
+void CompressEtc2RG8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
|
||
|
+{
|
||
|
+ int w = 0;
|
||
|
+ uint8_t rg[4*4*2];
|
||
|
+ do
|
||
|
+ {
|
||
|
+#ifdef __SSE4_1__
|
||
|
+ __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
|
||
|
+ __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
|
||
|
+ __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
|
||
|
+ __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
|
||
|
+
|
||
|
+ _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
|
||
|
+
|
||
|
+ __m128i c0 = _mm_castps_si128( px0 );
|
||
|
+ __m128i c1 = _mm_castps_si128( px1 );
|
||
|
+ __m128i c2 = _mm_castps_si128( px2 );
|
||
|
+ __m128i c3 = _mm_castps_si128( px3 );
|
||
|
+
|
||
|
+ __m128i mask = _mm_setr_epi32( 0x0e0a0602, -1, -1, -1 );
|
||
|
+
|
||
|
+ __m128i r0 = _mm_shuffle_epi8( c0, mask );
|
||
|
+ __m128i r1 = _mm_shuffle_epi8( c1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
|
||
|
+ __m128i r2 = _mm_shuffle_epi8( c2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
|
||
|
+ __m128i r3 = _mm_shuffle_epi8( c3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
|
||
|
+
|
||
|
+ __m128i s0 = _mm_or_si128( r0, r1 );
|
||
|
+ __m128i s1 = _mm_or_si128( r2, r3 );
|
||
|
+ __m128i s2 = _mm_or_si128( s0, s1 );
|
||
|
+
|
||
|
+ _mm_store_si128( (__m128i*)rg, s2 );
|
||
|
+
|
||
|
+ mask = _mm_setr_epi32( 0x0d090501, -1, -1, -1 );
|
||
|
+
|
||
|
+ r0 = _mm_shuffle_epi8( c0, mask );
|
||
|
+ r1 = _mm_shuffle_epi8( c1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
|
||
|
+ r2 = _mm_shuffle_epi8( c2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
|
||
|
+ r3 = _mm_shuffle_epi8( c3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
|
||
|
+
|
||
|
+ s0 = _mm_or_si128( r0, r1 );
|
||
|
+ s1 = _mm_or_si128( r2, r3 );
|
||
|
+ s2 = _mm_or_si128( s0, s1 );
|
||
|
+
|
||
|
+ _mm_store_si128( (__m128i*)&rg[16], s2 );
|
||
|
+ src += 4;
|
||
|
+#else
|
||
|
+ auto ptrr = rg;
|
||
|
+ auto ptrg = ptrr + 16;
|
||
|
+ for( int x=0; x<4; x++ )
|
||
|
+ {
|
||
|
+ auto v = *src;
|
||
|
+ *ptrr++ = (v & 0xff0000) >> 16;
|
||
|
+ *ptrg++ = (v & 0xff00) >> 8;
|
||
|
+ src += width;
|
||
|
+ v = *src;
|
||
|
+ *ptrr++ = (v & 0xff0000) >> 16;
|
||
|
+ *ptrg++ = (v & 0xff00) >> 8;
|
||
|
+ src += width;
|
||
|
+ v = *src;
|
||
|
+ *ptrr++ = (v & 0xff0000) >> 16;
|
||
|
+ *ptrg++ = (v & 0xff00) >> 8;
|
||
|
+ src += width;
|
||
|
+ v = *src;
|
||
|
+ *ptrr++ = (v & 0xff0000) >> 16;
|
||
|
+ *ptrg++ = (v & 0xff00) >> 8;
|
||
|
+ src -= width * 3 - 1;
|
||
|
+ }
|
||
|
+#endif
|
||
|
+ if( ++w == width/4 )
|
||
|
+ {
|
||
|
+ src += width * 3;
|
||
|
+ w = 0;
|
||
|
+ }
|
||
|
+ *dst++ = ProcessAlpha_ETC2( rg );
|
||
|
+ *dst++ = ProcessAlpha_ETC2( &rg[16] );
|
||
|
+ }
|
||
|
+ while( --blocks );
|
||
|
+}
|
||
|
+// -- GODOT end --
|
||
|
diff --git a/thirdparty/etcpak/ProcessRGB.hpp b/thirdparty/etcpak/ProcessRGB.hpp
|
||
|
index 043b46e636..050ea42562 100644
|
||
|
--- a/thirdparty/etcpak/ProcessRGB.hpp
|
||
|
+++ b/thirdparty/etcpak/ProcessRGB.hpp
|
||
|
@@ -9,5 +9,8 @@ void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_
|
||
|
void CompressEtc1RgbDither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||
|
void CompressEtc2Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
|
||
|
void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
|
||
|
-
|
||
|
+// -- GODOT start --
|
||
|
+void CompressEtc2R8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||
|
+void CompressEtc2RG8( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||
|
+// -- GODOT end --
|
||
|
#endif
|