Fix SIMD recognition at least with GCC targets. Still need to test with MSVC

This commit is contained in:
Pavel Krajcevski 2012-08-25 12:58:20 -04:00
parent efdca4b5bb
commit d68a119bc9
4 changed files with 48 additions and 19 deletions

View file

@ -1,24 +1,44 @@
INCLUDE_DIRECTORIES(${TexC_SOURCE_DIR}/BPTCEncoder/include)
INCLUDE_DIRECTORIES(${TexC_BINARY_DIR}/BPTCEncoder/include)
INCLUDE(CheckCXXSourceCompiles)
INCLUDE(CheckCXXSourceRuns)
SET(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
IF(CMAKE_COMPILER_IS_GNUCC)
SET(CMAKE_REQUIRED_FLAGS -msse4.1)
CHECK_CXX_SOURCE_COMPILES("config/testsse4.1.cpp" HAS_SSE_41)
SET(CMAKE_REQUIRED_FLAGS -msse4.1 -E)
CHECK_CXX_SOURCE_RUNS("
#include <smmintrin.h>
int main() {
const __m128 fv = _mm_set1_ps(1.0f);
const __m128 fv2 = _mm_set1_ps(2.0f);
const __m128 ans = _mm_blend_ps(fv, fv2, 2);
return ((int *)(&ans))[0];
}"
HAS_SSE_41
)
IF(HAS_SSE_41)
SET(CMAKE_REQUIRED_FLAGS -msse4.2)
CHECK_CXX_SOURCE_COMPILES("config/testsse4.2.cpp" HAS_SSE_POPCNT)
CHECK_CXX_SOURCE_RUNS("
#include <smmintrin.h>
int main() {
const unsigned int testMe = 5;
return _mm_popcnt_u32(testMe);
}"
HAS_SSE_POPCNT
)
ENDIF(HAS_SSE_41)
ELSEIF(MSVC)
#!FIXME!
ENDIF()
SET(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
CONFIGURE_FILE(
"config/BC7Config.h.in"
"src/BC7Config.h"
"include/BC7Config.h"
)
IF(CMAKE_COMPILER_IS_GNUCC)
@ -38,6 +58,21 @@ SET( SOURCES
)
IF( HAS_SSE_41 )
IF ( HAS_SSE_POPCNT )
IF( MSVC )
ADD_DEFINITIONS( /arch:SSE4.2 )
ELSE() #Assume GCC
ADD_DEFINITIONS( -msse4.2 )
ENDIF()
ELSE()
IF( MSVC )
ADD_DEFINITIONS( /arch:SSE4.1 )
ELSE() #Assume GCC
ADD_DEFINITIONS( -msse4.1 )
ENDIF()
ENDIF()
SET( HEADERS
${HEADERS}
src/RGBAEndpointsSIMD.h
@ -52,6 +87,6 @@ IF( HAS_SSE_41 )
ENDIF( HAS_SSE_41 )
ADD_LIBRARY( BPTCEncoder
${HEADERS}
${SOURCES}
${SIMD_SOURCES}
)

View file

@ -5,4 +5,4 @@
// explicitly by the CMake build process.
// Do we have the proper popcnt instruction defined?
#define HAS_SSE_POPCNT @HAS_SSE_POPCNT@
#cmakedefine HAS_SSE_POPCNT

View file

@ -1,10 +0,0 @@
#include <smmintrin.h>
int main() {
const __m128 fv = _mm_set1_ps(1.0f);
const __m128 fv2 = _mm_set1_ps(2.0f);
const __m128 ans = _mm_blend_ps(fv, fv2, 2);
return ((int *)(&ans))[0];
}

View file

@ -119,7 +119,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const {
// !SPEED! We should figure out a way to get rid of these scalar operations.
#ifdef HAS_SSE_POPCNT
const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]);
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
#else
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
#endif
@ -160,7 +160,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
// !SPEED! We should figure out a way to get rid of these scalar operations.
#ifdef HAS_SSE_POPCNT
const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]);
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
#else
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
#endif
@ -283,7 +283,11 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector
// nBuckets should be a power of two.
assert(!(nBuckets & (nBuckets - 1)));
#ifdef HAS_SSE_POPCNT
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
#else
const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
#endif
assert(indexPrec >= 2 && indexPrec <= 4);
typedef __m128i tInterpPair[2];