diff --git a/BPTCEncoder/include/BC7Compressor.h b/BPTCEncoder/include/BC7Compressor.h index 3ad9dae..a80ca66 100755 --- a/BPTCEncoder/include/BC7Compressor.h +++ b/BPTCEncoder/include/BC7Compressor.h @@ -123,6 +123,18 @@ namespace BC7C void CompressImageBC7SIMD(const unsigned char* inBuf, unsigned char* outBuf, unsigned int width, unsigned int height); #endif +#ifdef HAS_ATOMICS + // This is a threadsafe version of the compression function. Once it is called on a certain block of data, it will + // compress the entire amount of data. However, if the function is called multiple times from multiple threads then they + // will all dispatch to compress the data that they can and the one that finishes the compression resets the function. + // + // The function should be used as follows: + // for(int i = 0; i < NTHREADS; i++) { + // startThread(function, args); + // join_threads(); + void CompressImageBC7Atomic(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height); +#endif + // Decompress the image given as BC7 data to R8G8B8A8 format. Width and Height are the dimensions of the image in pixels. void DecompressImageBC7(const unsigned char* inBuf, unsigned char* outBuf, unsigned int width, unsigned int height); } diff --git a/BPTCEncoder/src/BC7Compressor.cpp b/BPTCEncoder/src/BC7Compressor.cpp index 9ba9e6b..61e8eed 100755 --- a/BPTCEncoder/src/BC7Compressor.cpp +++ b/BPTCEncoder/src/BC7Compressor.cpp @@ -1531,6 +1531,104 @@ namespace BC7C } } +#ifdef HAS_ATOMICS +#ifdef HAS_MSVC_ATOMICS + static uint32 TestAndSet(uint32 *x) { + return InterlockedExchange(x, 1); + } + + static uint32 FetchAndAdd(uint32 *x) { + return InterlockedIncrement(x); + } + + static void ResetTestAndSet(uint *x) { + *x = 0; + } +#elif defined HAS_GCC_ATOMICS + static uint32 TestAndSet(uint32 *x) { + return __sync_lock_test_and_set(x, 1); + } + + static uint32 FetchAndAdd(uint32 *x) { + return __sync_fetch_and_add(x, 1); + } + + static void ResetTestAndSet(uint32 *x) { + __sync_lock_release(x); + } +#endif + + // Variables used for synchronization in threadsafe implementation. + static ALIGN(32) uint32 _currentBlock = 0; + static ALIGN(32) uint32 _initialized = 0; + static const unsigned char *_inBuf; + static unsigned char *_outBuf; + static bool _initializedFlag = false; + + void CompressImageBC7Atomic( + const unsigned char *inBuf, + unsigned char *outBuf, + unsigned int width, + unsigned int height + ) { + + bool myData = false; + while(!myData) { + + // Have we initialized any data? + if(!TestAndSet(&_initialized)) { + + // I'm the first one here... initialize MY data... + + const int kMaxIters = BC7CompressionMode::kMaxAnnealingIterations; + BC7CompressionMode::MaxAnnealingIterations = min(kMaxIters, GetQualityLevel()); + + _currentBlock = 0; + + _inBuf = inBuf; + _outBuf = outBuf; + myData = true; + + _initializedFlag = true; + } + + // We've initialized data... is it mine? + else if(_inBuf == inBuf && _outBuf == outBuf) { + myData = true; + } + + const uint32 nBlocks = (height * width) / 16; + + // Make sure that whoever is initializing data is working on it... + while(!_initializedFlag && _currentBlock < nBlocks) { + YieldThread(); + } + + // Help finish whatever texture we're compressing before we start again on my work... + uint32 blockIdx; + while((blockIdx = FetchAndAdd(&_currentBlock)) < nBlocks) { + unsigned char *out = _outBuf + (16 * blockIdx); + const unsigned char *in = _inBuf + (64 * blockIdx); + + CompressBC7Block((const uint32 *)in, out); + YieldThread(); // Just to give other threads a chance to make some progress + } + + // If we've allocated someone to compress the last block, then reset the initialization... + if(blockIdx == nBlocks) { + _initializedFlag = false; + ResetTestAndSet(&_initialized); + } + else if(blockIdx > nBlocks) { + // Wait for last block to finish.. + while(_initialized) { + YieldThread(); + } + } + } + } +#endif // HAS_ATOMICS + void CompressImageBC7Stats( const unsigned char *inBuf, unsigned char *outBuf,