//------------------------------------------------------------------------------------- // DirectXTexConvert.cpp // // DirectX Texture Library - Image conversion // // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A // PARTICULAR PURPOSE. // // Copyright (c) Microsoft Corporation. All rights reserved. // // http://go.microsoft.com/fwlink/?LinkId=248926 //------------------------------------------------------------------------------------- #include "directxtexp.h" #ifdef USE_XNAMATH #if XNAMATH_VERSION < 205 #error This file requires XNAMATH v2.05 or later #endif #else using namespace DirectX::PackedVector; #endif namespace { inline float round_to_nearest( float x ) { // Round to nearest (even) float i = floorf(x); x -= i; if(x < 0.5f) return i; if(x > 0.5f) return i + 1.f; float int_part; modff( i / 2.f, &int_part ); if ( (2.f*int_part) == i ) { return i; } return i + 1.f; } }; namespace DirectX { static const XMVECTORF32 g_Grayscale = { 0.2125f, 0.7154f, 0.0721f, 0.0f }; //------------------------------------------------------------------------------------- // Copies an image row with optional clearing of alpha value to 1.0 // (can be used in place as well) otherwise copies the image row unmodified. //------------------------------------------------------------------------------------- void _CopyScanline(_When_(pDestination == pSource, _Inout_updates_bytes_(outSize)) _When_(pDestination != pSource, _Out_writes_bytes_(outSize)) LPVOID pDestination, _In_ size_t outSize, _In_reads_bytes_(inSize) LPCVOID pSource, _In_ size_t inSize, _In_ DXGI_FORMAT format, _In_ DWORD flags) { assert( pDestination && outSize > 0 ); assert( pSource && inSize > 0 ); assert( IsValid(format) && !IsVideo(format) ); if ( flags & TEXP_SCANLINE_SETALPHA ) { switch( format ) { //----------------------------------------------------------------------------- case DXGI_FORMAT_R32G32B32A32_TYPELESS: case DXGI_FORMAT_R32G32B32A32_FLOAT: case DXGI_FORMAT_R32G32B32A32_UINT: case DXGI_FORMAT_R32G32B32A32_SINT: { uint32_t alpha; if ( format == DXGI_FORMAT_R32G32B32A32_FLOAT ) alpha = 0x3f800000; else if ( format == DXGI_FORMAT_R32G32B32A32_SINT ) alpha = 0x7fffffff; else alpha = 0xffffffff; if ( pDestination == pSource ) { uint32_t *dPtr = reinterpret_cast(pDestination); for( size_t count = 0; count < outSize; count += 16 ) { dPtr += 3; *(dPtr++) = alpha; } } else { const uint32_t * __restrict sPtr = reinterpret_cast(pSource); uint32_t * __restrict dPtr = reinterpret_cast(pDestination); size_t size = std::min( outSize, inSize ); for( size_t count = 0; count < size; count += 16 ) { *(dPtr++) = *(sPtr++); *(dPtr++) = *(sPtr++); *(dPtr++) = *(sPtr++); *(dPtr++) = alpha; sPtr++; } } } return; //----------------------------------------------------------------------------- case DXGI_FORMAT_R16G16B16A16_TYPELESS: case DXGI_FORMAT_R16G16B16A16_FLOAT: case DXGI_FORMAT_R16G16B16A16_UNORM: case DXGI_FORMAT_R16G16B16A16_UINT: case DXGI_FORMAT_R16G16B16A16_SNORM: case DXGI_FORMAT_R16G16B16A16_SINT: { uint16_t alpha; if ( format == DXGI_FORMAT_R16G16B16A16_FLOAT ) alpha = 0x3c00; else if ( format == DXGI_FORMAT_R16G16B16A16_SNORM || format == DXGI_FORMAT_R16G16B16A16_SINT ) alpha = 0x7fff; else alpha = 0xffff; if ( pDestination == pSource ) { uint16_t *dPtr = reinterpret_cast(pDestination); for( size_t count = 0; count < outSize; count += 8 ) { dPtr += 3; *(dPtr++) = alpha; } } else { const uint16_t * __restrict sPtr = reinterpret_cast(pSource); uint16_t * __restrict dPtr = reinterpret_cast(pDestination); size_t size = std::min( outSize, inSize ); for( size_t count = 0; count < size; count += 8 ) { *(dPtr++) = *(sPtr++); *(dPtr++) = *(sPtr++); *(dPtr++) = *(sPtr++); *(dPtr++) = alpha; sPtr++; } } } return; //----------------------------------------------------------------------------- case DXGI_FORMAT_R10G10B10A2_TYPELESS: case DXGI_FORMAT_R10G10B10A2_UNORM: case DXGI_FORMAT_R10G10B10A2_UINT: case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: if ( pDestination == pSource ) { uint32_t *dPtr = reinterpret_cast(pDestination); for( size_t count = 0; count < outSize; count += 4 ) { #pragma warning(suppress: 6001 6101) // PREFast doesn't properly understand the aliasing here. *dPtr |= 0xC0000000; ++dPtr; } } else { const uint32_t * __restrict sPtr = reinterpret_cast(pSource); uint32_t * __restrict dPtr = reinterpret_cast(pDestination); size_t size = std::min( outSize, inSize ); for( size_t count = 0; count < size; count += 4 ) { *(dPtr++) = *(sPtr++) | 0xC0000000; } } return; //----------------------------------------------------------------------------- case DXGI_FORMAT_R8G8B8A8_TYPELESS: case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: case DXGI_FORMAT_R8G8B8A8_UINT: case DXGI_FORMAT_R8G8B8A8_SNORM: case DXGI_FORMAT_R8G8B8A8_SINT: case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8A8_TYPELESS: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: { const uint32_t alpha = ( format == DXGI_FORMAT_R8G8B8A8_SNORM || format == DXGI_FORMAT_R8G8B8A8_SINT ) ? 0x7f000000 : 0xff000000; if ( pDestination == pSource ) { uint32_t *dPtr = reinterpret_cast(pDestination); for( size_t count = 0; count < outSize; count += 4 ) { uint32_t t = *dPtr & 0xFFFFFF; t |= alpha; *(dPtr++) = t; } } else { const uint32_t * __restrict sPtr = reinterpret_cast(pSource); uint32_t * __restrict dPtr = reinterpret_cast(pDestination); size_t size = std::min( outSize, inSize ); for( size_t count = 0; count < size; count += 4 ) { uint32_t t = *(sPtr++) & 0xFFFFFF; t |= alpha; *(dPtr++) = t; } } } return; //----------------------------------------------------------------------------- case DXGI_FORMAT_B5G5R5A1_UNORM: if ( pDestination == pSource ) { uint16_t *dPtr = reinterpret_cast(pDestination); for( size_t count = 0; count < outSize; count += 2 ) { *(dPtr++) |= 0x8000; } } else { const uint16_t * __restrict sPtr = reinterpret_cast(pSource); uint16_t * __restrict dPtr = reinterpret_cast(pDestination); size_t size = std::min( outSize, inSize ); for( size_t count = 0; count < size; count += 2 ) { *(dPtr++) = *(sPtr++) | 0x8000; } } return; //----------------------------------------------------------------------------- case DXGI_FORMAT_A8_UNORM: memset( pDestination, 0xff, outSize ); return; #ifdef DXGI_1_2_FORMATS //----------------------------------------------------------------------------- case DXGI_FORMAT_B4G4R4A4_UNORM: if ( pDestination == pSource ) { uint16_t *dPtr = reinterpret_cast(pDestination); for( size_t count = 0; count < outSize; count += 2 ) { *(dPtr++) |= 0xF000; } } else { const uint16_t * __restrict sPtr = reinterpret_cast(pSource); uint16_t * __restrict dPtr = reinterpret_cast(pDestination); size_t size = std::min( outSize, inSize ); for( size_t count = 0; count < size; count += 2 ) { *(dPtr++) = *(sPtr++) | 0xF000; } } return; #endif // DXGI_1_2_FORMATS } } // Fall-through case is to just use memcpy (assuming this is not an in-place operation) if ( pDestination == pSource ) return; size_t size = std::min( outSize, inSize ); memcpy_s( pDestination, outSize, pSource, size ); } //------------------------------------------------------------------------------------- // Swizzles (RGB <-> BGR) an image row with optional clearing of alpha value to 1.0 // (can be used in place as well) otherwise copies the image row unmodified. //------------------------------------------------------------------------------------- _Use_decl_annotations_ void _SwizzleScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t inSize, DXGI_FORMAT format, DWORD flags ) { assert( pDestination && outSize > 0 ); assert( pSource && inSize > 0 ); assert( IsValid(format) && !IsVideo(format) ); switch( format ) { //--------------------------------------------------------------------------------- case DXGI_FORMAT_R10G10B10A2_TYPELESS: case DXGI_FORMAT_R10G10B10A2_UNORM: case DXGI_FORMAT_R10G10B10A2_UINT: case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: if ( flags & TEXP_SCANLINE_LEGACY ) { // Swap Red (R) and Blue (B) channel (used for D3DFMT_A2R10G10B10 legacy sources) if ( pDestination == pSource ) { uint32_t *dPtr = reinterpret_cast(pDestination); for( size_t count = 0; count < outSize; count += 4 ) { #pragma warning(suppress: 6001 6101) // PREFast doesn't properly understand the aliasing here. uint32_t t = *dPtr; uint32_t t1 = (t & 0x3ff00000) >> 20; uint32_t t2 = (t & 0x000003ff) << 20; uint32_t t3 = (t & 0x000ffc00); uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xC0000000 : (t & 0xC0000000); *(dPtr++) = t1 | t2 | t3 | ta; } } else { const uint32_t * __restrict sPtr = reinterpret_cast(pSource); uint32_t * __restrict dPtr = reinterpret_cast(pDestination); size_t size = std::min( outSize, inSize ); for( size_t count = 0; count < size; count += 4 ) { uint32_t t = *(sPtr++); uint32_t t1 = (t & 0x3ff00000) >> 20; uint32_t t2 = (t & 0x000003ff) << 20; uint32_t t3 = (t & 0x000ffc00); uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xC0000000 : (t & 0xC0000000); *(dPtr++) = t1 | t2 | t3 | ta; } } return; } break; //--------------------------------------------------------------------------------- case DXGI_FORMAT_R8G8B8A8_TYPELESS: case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8X8_UNORM: case DXGI_FORMAT_B8G8R8A8_TYPELESS: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8X8_TYPELESS: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: // Swap Red (R) and Blue (B) channels (used to convert from DXGI 1.1 BGR formats to DXGI 1.0 RGB) if ( pDestination == pSource ) { uint32_t *dPtr = reinterpret_cast(pDestination); for( size_t count = 0; count < outSize; count += 4 ) { uint32_t t = *dPtr; uint32_t t1 = (t & 0x00ff0000) >> 16; uint32_t t2 = (t & 0x000000ff) << 16; uint32_t t3 = (t & 0x0000ff00); uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (t & 0xFF000000); *(dPtr++) = t1 | t2 | t3 | ta; } } else { const uint32_t * __restrict sPtr = reinterpret_cast(pSource); uint32_t * __restrict dPtr = reinterpret_cast(pDestination); size_t size = std::min( outSize, inSize ); for( size_t count = 0; count < size; count += 4 ) { uint32_t t = *(sPtr++); uint32_t t1 = (t & 0x00ff0000) >> 16; uint32_t t2 = (t & 0x000000ff) << 16; uint32_t t3 = (t & 0x0000ff00); uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (t & 0xFF000000); *(dPtr++) = t1 | t2 | t3 | ta; } } return; } // Fall-through case is to just use memcpy (assuming this is not an in-place operation) if ( pDestination == pSource ) return; size_t size = std::min( outSize, inSize ); memcpy_s( pDestination, outSize, pSource, size ); } //------------------------------------------------------------------------------------- // Converts an image row with optional clearing of alpha value to 1.0 // Returns true if supported, false if expansion case not supported //------------------------------------------------------------------------------------- _Use_decl_annotations_ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat, LPCVOID pSource, size_t inSize, DXGI_FORMAT inFormat, DWORD flags ) { assert( pDestination && outSize > 0 ); assert( pSource && inSize > 0 ); assert( IsValid(outFormat) && !IsVideo(outFormat) ); assert( IsValid(inFormat) && !IsVideo(inFormat) ); switch( inFormat ) { case DXGI_FORMAT_B5G6R5_UNORM: if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM ) return false; // DXGI_FORMAT_B5G6R5_UNORM -> DXGI_FORMAT_R8G8B8A8_UNORM { const uint16_t * __restrict sPtr = reinterpret_cast(pSource); uint32_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 2, ocount += 4 ) { uint16_t t = *(sPtr++); uint32_t t1 = ((t & 0xf800) >> 8) | ((t & 0xe000) >> 13); uint32_t t2 = ((t & 0x07e0) << 5) | ((t & 0x0600) >> 5); uint32_t t3 = ((t & 0x001f) << 19) | ((t & 0x001c) << 14); *(dPtr++) = t1 | t2 | t3 | 0xff000000; } } return true; case DXGI_FORMAT_B5G5R5A1_UNORM: if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM ) return false; // DXGI_FORMAT_B5G5R5A1_UNORM -> DXGI_FORMAT_R8G8B8A8_UNORM { const uint16_t * __restrict sPtr = reinterpret_cast(pSource); uint32_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 2, ocount += 4 ) { uint16_t t = *(sPtr++); uint32_t t1 = ((t & 0x7c00) >> 7) | ((t & 0x7000) >> 12); uint32_t t2 = ((t & 0x03e0) << 6) | ((t & 0x0380) << 1); uint32_t t3 = ((t & 0x001f) << 19) | ((t & 0x001c) << 14); uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : ((t & 0x8000) ? 0xff000000 : 0); *(dPtr++) = t1 | t2 | t3 | ta; } } return true; #ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM: if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM ) return false; // DXGI_FORMAT_B4G4R4A4_UNORM -> DXGI_FORMAT_R8G8B8A8_UNORM { const uint16_t * __restrict sPtr = reinterpret_cast(pSource); uint32_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 2, ocount += 4 ) { uint16_t t = *(sPtr++); uint32_t t1 = ((t & 0x0f00) >> 4) | ((t & 0x0f00) >> 8); uint32_t t2 = ((t & 0x00f0) << 8) | ((t & 0x00f0) << 4); uint32_t t3 = ((t & 0x000f) << 20) | ((t & 0x000f) << 16); uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (((t & 0xf000) << 16) | ((t & 0xf000) << 12)); *(dPtr++) = t1 | t2 | t3 | ta; } } return true; #endif // DXGI_1_2_FORMATS } return false; } //------------------------------------------------------------------------------------- // Loads an image row into standard RGBA XMVECTOR (aligned) array //------------------------------------------------------------------------------------- #define LOAD_SCANLINE( type, func )\ if ( size >= sizeof(type) )\ {\ const type * __restrict sPtr = reinterpret_cast(pSource);\ for( size_t icount = 0; icount < size; icount += sizeof(type) )\ {\ if ( dPtr >= ePtr ) break;\ *(dPtr++) = func( sPtr++ );\ }\ return true;\ }\ return false; #define LOAD_SCANLINE3( type, func, defvec )\ if ( size >= sizeof(type) )\ {\ const type * __restrict sPtr = reinterpret_cast(pSource);\ for( size_t icount = 0; icount < size; icount += sizeof(type) )\ {\ XMVECTOR v = func( sPtr++ );\ if ( dPtr >= ePtr ) break;\ *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\ }\ return true;\ }\ return false; #define LOAD_SCANLINE2( type, func, defvec )\ if ( size >= sizeof(type) )\ {\ const type * __restrict sPtr = reinterpret_cast(pSource);\ for( size_t icount = 0; icount < size; icount += sizeof(type) )\ {\ XMVECTOR v = func( sPtr++ );\ if ( dPtr >= ePtr ) break;\ *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\ }\ return true;\ }\ return false; _Use_decl_annotations_ bool _LoadScanline( XMVECTOR* pDestination, size_t count, LPCVOID pSource, size_t size, DXGI_FORMAT format ) { assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) ); assert( pSource && size > 0 ); assert( IsValid(format) && !IsVideo(format) && !IsTypeless(format, false) && !IsCompressed(format) ); XMVECTOR* __restrict dPtr = pDestination; if ( !dPtr ) return false; const XMVECTOR* ePtr = pDestination + count; switch( format ) { case DXGI_FORMAT_R32G32B32A32_FLOAT: { size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size; memcpy_s( dPtr, sizeof(XMVECTOR)*count, pSource, msize ); } return true; case DXGI_FORMAT_R32G32B32A32_UINT: LOAD_SCANLINE( XMUINT4, XMLoadUInt4 ) case DXGI_FORMAT_R32G32B32A32_SINT: LOAD_SCANLINE( XMINT4, XMLoadSInt4 ) case DXGI_FORMAT_R32G32B32_FLOAT: LOAD_SCANLINE3( XMFLOAT3, XMLoadFloat3, g_XMIdentityR3 ) case DXGI_FORMAT_R32G32B32_UINT: LOAD_SCANLINE3( XMUINT3, XMLoadUInt3, g_XMIdentityR3 ) case DXGI_FORMAT_R32G32B32_SINT: LOAD_SCANLINE3( XMINT3, XMLoadSInt3, g_XMIdentityR3 ) case DXGI_FORMAT_R16G16B16A16_FLOAT: LOAD_SCANLINE( XMHALF4, XMLoadHalf4 ) case DXGI_FORMAT_R16G16B16A16_UNORM: LOAD_SCANLINE( XMUSHORTN4, XMLoadUShortN4 ) case DXGI_FORMAT_R16G16B16A16_UINT: LOAD_SCANLINE( XMUSHORT4, XMLoadUShort4 ) case DXGI_FORMAT_R16G16B16A16_SNORM: LOAD_SCANLINE( XMSHORTN4, XMLoadShortN4 ) case DXGI_FORMAT_R16G16B16A16_SINT: LOAD_SCANLINE( XMSHORT4, XMLoadShort4 ) case DXGI_FORMAT_R32G32_FLOAT: LOAD_SCANLINE2( XMFLOAT2, XMLoadFloat2, g_XMIdentityR3 ) case DXGI_FORMAT_R32G32_UINT: LOAD_SCANLINE2( XMUINT2, XMLoadUInt2, g_XMIdentityR3 ) case DXGI_FORMAT_R32G32_SINT: LOAD_SCANLINE2( XMINT2, XMLoadSInt2, g_XMIdentityR3 ) case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: if ( size >= (sizeof(float)+sizeof(uint32_t)) ) { const float * sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += (sizeof(float)+sizeof(uint32_t)) ) { const uint8_t* ps8 = reinterpret_cast( &sPtr[1] ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( sPtr[0], static_cast( *ps8 ), 0.f, 1.f ); sPtr += 2; } return true; } return false; case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: if ( size >= (sizeof(float)+sizeof(uint32_t)) ) { const float * sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += (sizeof(float)+sizeof(uint32_t)) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( sPtr[0], 0.f /* typeless component assumed zero */, 0.f, 1.f ); sPtr += 2; } return true; } return false; case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: if ( size >= (sizeof(float)+sizeof(uint32_t)) ) { const float * sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += (sizeof(float)+sizeof(uint32_t)) ) { const uint8_t* pg8 = reinterpret_cast( &sPtr[1] ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( 0.f /* typeless component assumed zero */, static_cast( *pg8 ), 0.f, 1.f ); sPtr += 2; } return true; } return false; case DXGI_FORMAT_R10G10B10A2_UNORM: LOAD_SCANLINE( XMUDECN4, XMLoadUDecN4 ); case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: if ( size >= sizeof(XMUDECN4) ) { const XMUDECN4 * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(XMUDECN4) ) { if ( dPtr >= ePtr ) break; int32_t ElementX = sPtr->v & 0x3FF; int32_t ElementY = (sPtr->v >> 10) & 0x3FF; int32_t ElementZ = (sPtr->v >> 20) & 0x3FF; XMVECTORF32 vResult = { (float)(ElementX - 0x180) / 510.0f, (float)(ElementY - 0x180) / 510.0f, (float)(ElementZ - 0x180) / 510.0f, (float)(sPtr->v >> 30) / 3.0f }; ++sPtr; *(dPtr++) = vResult.v; } return true; } return false; case DXGI_FORMAT_R10G10B10A2_UINT: LOAD_SCANLINE( XMUDEC4, XMLoadUDec4 ); case DXGI_FORMAT_R11G11B10_FLOAT: LOAD_SCANLINE3( XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3 ); case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: LOAD_SCANLINE( XMUBYTEN4, XMLoadUByteN4 ) case DXGI_FORMAT_R8G8B8A8_UINT: LOAD_SCANLINE( XMUBYTE4, XMLoadUByte4 ) case DXGI_FORMAT_R8G8B8A8_SNORM: LOAD_SCANLINE( XMBYTEN4, XMLoadByteN4 ) case DXGI_FORMAT_R8G8B8A8_SINT: LOAD_SCANLINE( XMBYTE4, XMLoadByte4 ) case DXGI_FORMAT_R16G16_FLOAT: LOAD_SCANLINE2( XMHALF2, XMLoadHalf2, g_XMIdentityR3 ) case DXGI_FORMAT_R16G16_UNORM: LOAD_SCANLINE2( XMUSHORTN2, XMLoadUShortN2, g_XMIdentityR3 ) case DXGI_FORMAT_R16G16_UINT: LOAD_SCANLINE2( XMUSHORT2, XMLoadUShort2, g_XMIdentityR3 ) case DXGI_FORMAT_R16G16_SNORM: LOAD_SCANLINE2( XMSHORTN2, XMLoadShortN2, g_XMIdentityR3 ) case DXGI_FORMAT_R16G16_SINT: LOAD_SCANLINE2( XMSHORT2, XMLoadShort2, g_XMIdentityR3 ) case DXGI_FORMAT_D32_FLOAT: case DXGI_FORMAT_R32_FLOAT: if ( size >= sizeof(float) ) { const float* __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(float) ) { XMVECTOR v = XMLoadFloat( sPtr++ ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 ); } return true; } return false; case DXGI_FORMAT_R32_UINT: if ( size >= sizeof(uint32_t) ) { const uint32_t* __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) { XMVECTOR v = XMLoadInt( sPtr++ ); v = XMConvertVectorUIntToFloat( v, 0 ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 ); } return true; } return false; case DXGI_FORMAT_R32_SINT: if ( size >= sizeof(int32_t) ) { const int32_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(int32_t) ) { XMVECTOR v = XMLoadInt( reinterpret_cast (sPtr++) ); v = XMConvertVectorIntToFloat( v, 0 ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 ); } return true; } return false; case DXGI_FORMAT_D24_UNORM_S8_UINT: if ( size >= sizeof(uint32_t) ) { const uint32_t * sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) { float d = static_cast( *sPtr & 0xFFFFFF ) / 16777215.f; float s = static_cast( ( *sPtr & 0xFF000000 ) >> 24 ); ++sPtr; if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( d, s, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: if ( size >= sizeof(uint32_t) ) { const uint32_t * sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) { float r = static_cast( *sPtr & 0xFFFFFF ) / 16777215.f; ++sPtr; if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( r, 0.f /* typeless component assumed zero */, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_X24_TYPELESS_G8_UINT: if ( size >= sizeof(uint32_t) ) { const uint32_t * sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) { float g = static_cast( ( *sPtr & 0xFF000000 ) >> 24 ); ++sPtr; if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( 0.f /* typeless component assumed zero */, g, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_R8G8_UNORM: LOAD_SCANLINE2( XMUBYTEN2, XMLoadUByteN2, g_XMIdentityR3 ) case DXGI_FORMAT_R8G8_UINT: LOAD_SCANLINE2( XMUBYTE2, XMLoadUByte2, g_XMIdentityR3 ) case DXGI_FORMAT_R8G8_SNORM: LOAD_SCANLINE2( XMBYTEN2, XMLoadByteN2, g_XMIdentityR3 ) case DXGI_FORMAT_R8G8_SINT: LOAD_SCANLINE2( XMBYTE2, XMLoadByte2, g_XMIdentityR3 ) case DXGI_FORMAT_R16_FLOAT: if ( size >= sizeof(HALF) ) { const HALF * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(HALF) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_D16_UNORM: case DXGI_FORMAT_R16_UNORM: if ( size >= sizeof(uint16_t) ) { const uint16_t* __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint16_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast(*sPtr++) / 65535.f, 0.f, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_R16_UINT: if ( size >= sizeof(uint16_t) ) { const uint16_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint16_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast(*sPtr++), 0.f, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_R16_SNORM: if ( size >= sizeof(int16_t) ) { const int16_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(int16_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast(*sPtr++) / 32767.f, 0.f, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_R16_SINT: if ( size >= sizeof(int16_t) ) { const int16_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(int16_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast(*sPtr++), 0.f, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_R8_UNORM: if ( size >= sizeof(uint8_t) ) { const uint8_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast(*sPtr++) / 255.f, 0.f, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_R8_UINT: if ( size >= sizeof(uint8_t) ) { const uint8_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast(*sPtr++), 0.f, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_R8_SNORM: if ( size >= sizeof(int8_t) ) { const int8_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(int8_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast(*sPtr++) / 127.f, 0.f, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_R8_SINT: if ( size >= sizeof(int8_t) ) { const int8_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(int8_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast(*sPtr++), 0.f, 0.f, 1.f ); } return true; } return false; case DXGI_FORMAT_A8_UNORM: if ( size >= sizeof(uint8_t) ) { const uint8_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( 0.f, 0.f, 0.f, static_cast(*sPtr++) / 255.f ); } return true; } return false; case DXGI_FORMAT_R1_UNORM: if ( size >= sizeof(uint8_t) ) { const uint8_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { for( size_t bcount = 8; bcount > 0; --bcount ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( (((*sPtr >> (bcount-1)) & 0x1) ? 1.f : 0.f), 0.f, 0.f, 1.f ); } ++sPtr; } return true; } return false; case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: if ( size >= sizeof(XMFLOAT3SE) ) { const XMFLOAT3SE * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(XMFLOAT3SE) ) { union { float f; int32_t i; } fi; fi.i = 0x33800000 + (sPtr->e << 23); float Scale = fi.f; XMVECTORF32 v = { Scale * float( sPtr->xm ), Scale * float( sPtr->ym ), Scale * float( sPtr->zm ), 1.0f }; if ( dPtr >= ePtr ) break; *(dPtr++) = v; } return true; } return false; case DXGI_FORMAT_R8G8_B8G8_UNORM: if ( size >= sizeof(XMUBYTEN4) ) { const XMUBYTEN4 * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) { XMVECTOR v = XMLoadUByteN4( sPtr++ ); XMVECTOR v1 = XMVectorSwizzle<0, 3, 2, 1>( v ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1110 ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v1, g_XMSelect1110 ); } return true; } return false; case DXGI_FORMAT_G8R8_G8B8_UNORM: if ( size >= sizeof(XMUBYTEN4) ) { const XMUBYTEN4 * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) { XMVECTOR v = XMLoadUByteN4( sPtr++ ); XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>( v ); XMVECTOR v1 = XMVectorSwizzle<1, 2, 3, 0>( v ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v0, g_XMSelect1110 ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v1, g_XMSelect1110 ); } return true; } return false; case DXGI_FORMAT_B5G6R5_UNORM: if ( size >= sizeof(XMU565) ) { static XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/63.f, 1.f/31.f, 1.f }; const XMU565 * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(XMU565) ) { XMVECTOR v = XMLoadU565( sPtr++ ); v = XMVectorMultiply( v, s_Scale ); v = XMVectorSwizzle<2, 1, 0, 3>( v ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1110 ); } return true; } return false; case DXGI_FORMAT_B5G5R5A1_UNORM: if ( size >= sizeof(XMU555) ) { static XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/31.f, 1.f/31.f, 1.f }; const XMU555 * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(XMU555) ) { XMVECTOR v = XMLoadU555( sPtr++ ); v = XMVectorMultiply( v, s_Scale ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>( v ); } return true; } return false; case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: if ( size >= sizeof(XMUBYTEN4) ) { const XMUBYTEN4 * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) { XMVECTOR v = XMLoadUByteN4( sPtr++ ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>( v ); } return true; } return false; case DXGI_FORMAT_B8G8R8X8_UNORM: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: if ( size >= sizeof(XMUBYTEN4) ) { const XMUBYTEN4 * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) { XMVECTOR v = XMLoadUByteN4( sPtr++ ); v = XMVectorSwizzle<2, 1, 0, 3>( v ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1110 ); } return true; } return false; #ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM: if ( size >= sizeof(XMUNIBBLE4) ) { static XMVECTORF32 s_Scale = { 1.f/15.f, 1.f/15.f, 1.f/15.f, 1.f/15.f }; const XMUNIBBLE4 * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(XMUNIBBLE4) ) { XMVECTOR v = XMLoadUNibble4( sPtr++ ); v = XMVectorMultiply( v, s_Scale ); if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>( v ); } return true; } return false; // we don't support the video formats ( see IsVideo function ) #endif // DXGI_1_2_FORMATS default: return false; } } #undef LOAD_SCANLINE #undef LOAD_SCANLINE3 #undef LOAD_SCANLINE2 //------------------------------------------------------------------------------------- // Stores an image row from standard RGBA XMVECTOR (aligned) array //------------------------------------------------------------------------------------- #define STORE_SCANLINE( type, func )\ if ( size >= sizeof(type) )\ {\ type * __restrict dPtr = reinterpret_cast(pDestination);\ for( size_t icount = 0; icount < size; icount += sizeof(type) )\ {\ if ( sPtr >= ePtr ) break;\ func( dPtr++, *sPtr++ );\ }\ return true; \ }\ return false; _Use_decl_annotations_ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, const XMVECTOR* pSource, size_t count, float threshold ) { assert( pDestination && size > 0 ); assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) ); assert( IsValid(format) && !IsVideo(format) && !IsTypeless(format) && !IsCompressed(format) ); const XMVECTOR* __restrict sPtr = pSource; if ( !sPtr ) return false; const XMVECTOR* ePtr = pSource + count; switch( format ) { case DXGI_FORMAT_R32G32B32A32_FLOAT: STORE_SCANLINE( XMFLOAT4, XMStoreFloat4 ) case DXGI_FORMAT_R32G32B32A32_UINT: STORE_SCANLINE( XMUINT4, XMStoreUInt4 ) case DXGI_FORMAT_R32G32B32A32_SINT: STORE_SCANLINE( XMINT4, XMStoreSInt4 ) case DXGI_FORMAT_R32G32B32_FLOAT: STORE_SCANLINE( XMFLOAT3, XMStoreFloat3 ) case DXGI_FORMAT_R32G32B32_UINT: STORE_SCANLINE( XMUINT3, XMStoreUInt3 ) case DXGI_FORMAT_R32G32B32_SINT: STORE_SCANLINE( XMINT3, XMStoreSInt3 ) case DXGI_FORMAT_R16G16B16A16_FLOAT: STORE_SCANLINE( XMHALF4, XMStoreHalf4 ) case DXGI_FORMAT_R16G16B16A16_UNORM: STORE_SCANLINE( XMUSHORTN4, XMStoreUShortN4 ) case DXGI_FORMAT_R16G16B16A16_UINT: STORE_SCANLINE( XMUSHORT4, XMStoreUShort4 ) case DXGI_FORMAT_R16G16B16A16_SNORM: STORE_SCANLINE( XMSHORTN4, XMStoreShortN4 ) case DXGI_FORMAT_R16G16B16A16_SINT: STORE_SCANLINE( XMSHORT4, XMStoreShort4 ) case DXGI_FORMAT_R32G32_FLOAT: STORE_SCANLINE( XMFLOAT2, XMStoreFloat2 ) case DXGI_FORMAT_R32G32_UINT: STORE_SCANLINE( XMUINT2, XMStoreUInt2 ) case DXGI_FORMAT_R32G32_SINT: STORE_SCANLINE( XMINT2, XMStoreSInt2 ) case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: if ( size >= (sizeof(float)+sizeof(uint32_t)) ) { float *dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += (sizeof(float)+sizeof(uint32_t)) ) { if ( sPtr >= ePtr ) break; XMFLOAT4 f; XMStoreFloat4( &f, *sPtr++ ); dPtr[0] = f.x; uint8_t* ps8 = reinterpret_cast( &dPtr[1] ); ps8[0] = static_cast( std::min( 255.f, std::max( 0.f, f.y ) ) ); ps8[1] = ps8[2] = ps8[3] = 0; dPtr += 2; } return true; } return false; case DXGI_FORMAT_R10G10B10A2_UNORM: STORE_SCANLINE( XMUDECN4, XMStoreUDecN4 ); case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: if ( size >= sizeof(XMUDECN4) ) { static const XMVECTORF32 Scale = { 510.0f, 510.0f, 510.0f, 3.0f }; static const XMVECTORF32 Bias = { 384.0f, 384.0f, 384.0f, 0.0f }; static const XMVECTORF32 C = { 1023.f, 1023.f, 1023.f, 3.f }; XMUDECN4 * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(XMUDECN4) ) { if ( sPtr >= ePtr ) break; XMVECTOR N = XMVectorMultiplyAdd( *sPtr++, Scale, Bias ); N = XMVectorClamp( N, g_XMZero, C ); XMFLOAT4A tmp; XMStoreFloat4A(&tmp, N ); dPtr->v = ((uint32_t)tmp.w << 30) | (((uint32_t)tmp.z & 0x3FF) << 20) | (((uint32_t)tmp.y & 0x3FF) << 10) | (((uint32_t)tmp.x & 0x3FF)); ++dPtr; } return true; } return false; case DXGI_FORMAT_R10G10B10A2_UINT: STORE_SCANLINE( XMUDEC4, XMStoreUDec4 ); case DXGI_FORMAT_R11G11B10_FLOAT: STORE_SCANLINE( XMFLOAT3PK, XMStoreFloat3PK ); case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: STORE_SCANLINE( XMUBYTEN4, XMStoreUByteN4 ) case DXGI_FORMAT_R8G8B8A8_UINT: STORE_SCANLINE( XMUBYTE4, XMStoreUByte4 ) case DXGI_FORMAT_R8G8B8A8_SNORM: STORE_SCANLINE( XMBYTEN4, XMStoreByteN4 ) case DXGI_FORMAT_R8G8B8A8_SINT: STORE_SCANLINE( XMBYTE4, XMStoreByte4 ) case DXGI_FORMAT_R16G16_FLOAT: STORE_SCANLINE( XMHALF2, XMStoreHalf2 ) case DXGI_FORMAT_R16G16_UNORM: STORE_SCANLINE( XMUSHORTN2, XMStoreUShortN2 ) case DXGI_FORMAT_R16G16_UINT: STORE_SCANLINE( XMUSHORT2, XMStoreUShort2 ) case DXGI_FORMAT_R16G16_SNORM: STORE_SCANLINE( XMSHORTN2, XMStoreShortN2 ) case DXGI_FORMAT_R16G16_SINT: STORE_SCANLINE( XMSHORT2, XMStoreShort2 ) case DXGI_FORMAT_D32_FLOAT: case DXGI_FORMAT_R32_FLOAT: if ( size >= sizeof(float) ) { float * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(float) ) { if ( sPtr >= ePtr ) break; XMStoreFloat( dPtr++, *(sPtr++) ); } return true; } return false; case DXGI_FORMAT_R32_UINT: if ( size >= sizeof(uint32_t) ) { uint32_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMConvertVectorFloatToUInt( *(sPtr++), 0 ); XMStoreInt( dPtr++, v ); } return true; } return false; case DXGI_FORMAT_R32_SINT: if ( size >= sizeof(uint32_t) ) { uint32_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMConvertVectorFloatToInt( *(sPtr++), 0 ); XMStoreInt( dPtr++, v ); } return true; } return false; case DXGI_FORMAT_D24_UNORM_S8_UINT: if ( size >= sizeof(uint32_t) ) { static const XMVECTORF32 clamp = { 1.f, 255.f, 0.f, 0.f }; XMVECTOR zero = XMVectorZero(); uint32_t *dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) { if ( sPtr >= ePtr ) break; XMFLOAT4 f; XMStoreFloat4( &f, XMVectorClamp( *sPtr++, zero, clamp ) ); *dPtr++ = (static_cast( f.x * 16777215.f ) & 0xFFFFFF) | ((static_cast( f.y ) & 0xFF) << 24); } return true; } return false; case DXGI_FORMAT_R8G8_UNORM: STORE_SCANLINE( XMUBYTEN2, XMStoreUByteN2 ) case DXGI_FORMAT_R8G8_UINT: STORE_SCANLINE( XMUBYTE2, XMStoreUByte2 ) case DXGI_FORMAT_R8G8_SNORM: STORE_SCANLINE( XMBYTEN2, XMStoreByteN2 ) case DXGI_FORMAT_R8G8_SINT: STORE_SCANLINE( XMBYTE2, XMStoreByte2 ) case DXGI_FORMAT_R16_FLOAT: if ( size >= sizeof(HALF) ) { HALF * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(HALF) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); *(dPtr++) = XMConvertFloatToHalf(v); } return true; } return false; case DXGI_FORMAT_D16_UNORM: case DXGI_FORMAT_R16_UNORM: if ( size >= sizeof(uint16_t) ) { uint16_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(uint16_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max( std::min( v, 1.f ), 0.f ); *(dPtr++) = static_cast( v*65535.f + 0.5f ); } return true; } return false; case DXGI_FORMAT_R16_UINT: if ( size >= sizeof(uint16_t) ) { uint16_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(uint16_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max( std::min( v, 65535.f ), 0.f ); *(dPtr++) = static_cast(v); } return true; } return false; case DXGI_FORMAT_R16_SNORM: if ( size >= sizeof(int16_t) ) { int16_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(int16_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max( std::min( v, 1.f ), -1.f ); *(dPtr++) = static_cast( v * 32767.f ); } return true; } return false; case DXGI_FORMAT_R16_SINT: if ( size >= sizeof(int16_t) ) { int16_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(int16_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max( std::min( v, 32767.f ), -32767.f ); *(dPtr++) = static_cast(v); } return true; } return false; case DXGI_FORMAT_R8_UNORM: if ( size >= sizeof(uint8_t) ) { uint8_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max( std::min( v, 1.f ), 0.f ); *(dPtr++) = static_cast( v * 255.f); } return true; } return false; case DXGI_FORMAT_R8_UINT: if ( size >= sizeof(uint8_t) ) { uint8_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max( std::min( v, 255.f ), 0.f ); *(dPtr++) = static_cast(v); } return true; } return false; case DXGI_FORMAT_R8_SNORM: if ( size >= sizeof(int8_t) ) { int8_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(int8_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max( std::min( v, 1.f ), -1.f ); *(dPtr++) = static_cast( v * 127.f ); } return true; } return false; case DXGI_FORMAT_R8_SINT: if ( size >= sizeof(int8_t) ) { int8_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(int8_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max( std::min( v, 127.f ), -127.f ); *(dPtr++) = static_cast( v ); } return true; } return false; case DXGI_FORMAT_A8_UNORM: if ( size >= sizeof(uint8_t) ) { uint8_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetW( *sPtr++ ); v = std::max( std::min( v, 1.f ), 0.f ); *(dPtr++) = static_cast( v * 255.f); } return true; } return false; case DXGI_FORMAT_R1_UNORM: if ( size >= sizeof(uint8_t) ) { uint8_t * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { uint8_t pixels = 0; for( size_t bcount = 8; bcount > 0; --bcount ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); // Absolute thresholding generally doesn't give good results for all images // Picking the 'right' threshold automatically requires whole-image analysis if ( v > 0.25f ) pixels |= 1 << (bcount-1); } *(dPtr++) = pixels; } return true; } return false; case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: if ( size >= sizeof(XMFLOAT3SE) ) { static const float maxf9 = float(0x1FF << 7); static const float minf9 = float(1.f / (1 << 16)); XMFLOAT3SE * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(XMFLOAT3SE) ) { if ( sPtr >= ePtr ) break; XMFLOAT3 rgb; XMStoreFloat3( &rgb, *(sPtr++) ); float r = (rgb.x >= 0.f) ? ( (rgb.x > maxf9) ? maxf9 : rgb.x ) : 0.f; float g = (rgb.y >= 0.f) ? ( (rgb.y > maxf9) ? maxf9 : rgb.y ) : 0.f; float b = (rgb.z >= 0.f) ? ( (rgb.z > maxf9) ? maxf9 : rgb.z ) : 0.f; const float max_rg = (r > g) ? r : g; const float max_rgb = (max_rg > b) ? max_rg : b; const float maxColor = (max_rgb > minf9) ? max_rgb : minf9; union { float f; INT32 i; } fi; fi.f = maxColor; fi.i &= 0xFF800000; // cut off fraction dPtr->e = (fi.i - 0x37800000) >> 23; fi.i = 0x83000000 - fi.i; float ScaleR = fi.f; dPtr->xm = static_cast( round_to_nearest(r * ScaleR) ); dPtr->ym = static_cast( round_to_nearest(g * ScaleR) ); dPtr->zm = static_cast( round_to_nearest(b * ScaleR) ); ++dPtr; } return true; } return false; case DXGI_FORMAT_R8G8_B8G8_UNORM: if ( size >= sizeof(XMUBYTEN4) ) { XMUBYTEN4 * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v0 = *sPtr++; XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY( *sPtr++ ) : XMVectorZero(); XMVECTOR v = XMVectorSelect( v1, v0, g_XMSelect1110 ); XMStoreUByteN4( dPtr++, v ); } return true; } return false; case DXGI_FORMAT_G8R8_G8B8_UNORM: if ( size >= sizeof(XMUBYTEN4) ) { static XMVECTORI32 select1101 = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1}; XMUBYTEN4 * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>( *sPtr++ ); XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY( *sPtr++ ) : XMVectorZero(); XMVECTOR v = XMVectorSelect( v1, v0, select1101 ); XMStoreUByteN4( dPtr++, v ); } return true; } return false; case DXGI_FORMAT_B5G6R5_UNORM: if ( size >= sizeof(XMU565) ) { static XMVECTORF32 s_Scale = { 31.f, 63.f, 31.f, 1.f }; XMU565 * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(XMU565) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ ); v = XMVectorMultiply( v, s_Scale ); XMStoreU565( dPtr++, v ); } return true; } return false; case DXGI_FORMAT_B5G5R5A1_UNORM: if ( size >= sizeof(XMU555) ) { static XMVECTORF32 s_Scale = { 31.f, 31.f, 31.f, 1.f }; XMU555 * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(XMU555) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ ); v = XMVectorMultiply( v, s_Scale ); XMStoreU555( dPtr, v ); dPtr->w = ( XMVectorGetW( v ) > threshold ) ? 1 : 0; ++dPtr; } return true; } return false; case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: if ( size >= sizeof(XMUBYTEN4) ) { XMUBYTEN4 * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ ); XMStoreUByteN4( dPtr++, v ); } return true; } return false; case DXGI_FORMAT_B8G8R8X8_UNORM: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: if ( size >= sizeof(XMUBYTEN4) ) { XMUBYTEN4 * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorPermute<2, 1, 0, 7>( *sPtr++, g_XMIdentityR3 ); XMStoreUByteN4( dPtr++, v ); } return true; } return false; #ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM: if ( size >= sizeof(XMUNIBBLE4) ) { static XMVECTORF32 s_Scale = { 15.f, 15.f, 15.f, 15.f }; XMUNIBBLE4 * __restrict dPtr = reinterpret_cast(pDestination); for( size_t icount = 0; icount < size; icount += sizeof(XMUNIBBLE4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ ); v = XMVectorMultiply( v, s_Scale ); XMStoreUNibble4( dPtr++, v ); } return true; } return false; // We don't support the video formats ( see IsVideo function ) #endif // DXGI_1_2_FORMATS default: return false; } } #undef STORE_SCANLINE //------------------------------------------------------------------------------------- // Convert DXGI image to/from GUID_WICPixelFormat128bppRGBAFloat (no range conversions) //------------------------------------------------------------------------------------- _Use_decl_annotations_ HRESULT _ConvertToR32G32B32A32( const Image& srcImage, ScratchImage& image ) { if ( !srcImage.pixels ) return E_POINTER; HRESULT hr = image.Initialize2D( DXGI_FORMAT_R32G32B32A32_FLOAT, srcImage.width, srcImage.height, 1, 1 ); if ( FAILED(hr) ) return hr; const Image *img = image.GetImage( 0, 0, 0 ); if ( !img ) { image.Release(); return E_POINTER; } uint8_t* pDest = img->pixels; if ( !pDest ) { image.Release(); return E_POINTER; } const uint8_t *pSrc = srcImage.pixels; for( size_t h = 0; h < srcImage.height; ++h ) { if ( !_LoadScanline( reinterpret_cast(pDest), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format ) ) { image.Release(); return E_FAIL; } pSrc += srcImage.rowPitch; pDest += img->rowPitch; } return S_OK; } _Use_decl_annotations_ HRESULT _ConvertFromR32G32B32A32( const Image& srcImage, const Image& destImage ) { assert( srcImage.format == DXGI_FORMAT_R32G32B32A32_FLOAT ); if ( !srcImage.pixels || !destImage.pixels ) return E_POINTER; if ( srcImage.width != destImage.width || srcImage.height != destImage.height ) return E_FAIL; const uint8_t *pSrc = srcImage.pixels; uint8_t* pDest = destImage.pixels; for( size_t h = 0; h < srcImage.height; ++h ) { if ( !_StoreScanline( pDest, destImage.rowPitch, destImage.format, reinterpret_cast(pSrc), srcImage.width ) ) return E_FAIL; pSrc += srcImage.rowPitch; pDest += destImage.rowPitch; } return S_OK; } _Use_decl_annotations_ HRESULT _ConvertFromR32G32B32A32( const Image& srcImage, DXGI_FORMAT format, ScratchImage& image ) { if ( !srcImage.pixels ) return E_POINTER; HRESULT hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 ); if ( FAILED(hr) ) return hr; const Image *img = image.GetImage( 0, 0, 0 ); if ( !img ) { image.Release(); return E_POINTER; } hr = _ConvertFromR32G32B32A32( srcImage, *img ); if ( FAILED(hr) ) { image.Release(); return hr; } return S_OK; } _Use_decl_annotations_ HRESULT _ConvertFromR32G32B32A32( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DXGI_FORMAT format, ScratchImage& result ) { if ( !srcImages ) return E_POINTER; result.Release(); assert( metadata.format == DXGI_FORMAT_R32G32B32A32_FLOAT ); TexMetadata mdata2 = metadata; mdata2.format = format; HRESULT hr = result.Initialize( mdata2 ); if ( FAILED(hr) ) return hr; if ( nimages != result.GetImageCount() ) { result.Release(); return E_FAIL; } const Image* dest = result.GetImages(); if ( !dest ) { result.Release(); return E_POINTER; } for( size_t index=0; index < nimages; ++index ) { const Image& src = srcImages[ index ]; const Image& dst = dest[ index ]; assert( src.format == DXGI_FORMAT_R32G32B32A32_FLOAT ); assert( dst.format == format ); if ( src.width != dst.width || src.height != dst.height ) { result.Release(); return E_FAIL; } const uint8_t* pSrc = src.pixels; uint8_t* pDest = dst.pixels; if ( !pSrc || !pDest ) { result.Release(); return E_POINTER; } for( size_t h=0; h < src.height; ++h ) { if ( !_StoreScanline( pDest, dst.rowPitch, format, reinterpret_cast(pSrc), src.width ) ) { result.Release(); return E_FAIL; } pSrc += src.rowPitch; pDest += dst.rowPitch; } } return S_OK; } //------------------------------------------------------------------------------------- // Convert from Linear RGB to sRGB // // if C_linear <= 0.0031308 -> C_srgb = 12.92 * C_linear // if C_linear > 0.0031308 -> C_srgb = ( 1 + a ) * pow( C_Linear, 1 / 2.4 ) - a // where a = 0.055 //------------------------------------------------------------------------------------- static inline XMVECTOR RGBToSRGB( FXMVECTOR rgb ) { static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 1.f }; static const XMVECTORF32 Linear = { 12.92f, 12.92f, 12.92f, 1.f }; static const XMVECTORF32 Scale = { 1.055f, 1.055f, 1.055f, 1.f }; static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f }; static const XMVECTORF32 InvGamma = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.f }; XMVECTOR V = XMVectorSaturate(rgb); XMVECTOR V0 = XMVectorMultiply( V, Linear ); XMVECTOR V1 = Scale * XMVectorPow( V, InvGamma ) - Bias; XMVECTOR select = XMVectorLess( V, Cutoff ); V = XMVectorSelect( V1, V0, select ); return XMVectorSelect( rgb, V, g_XMSelect1110 ); } _Use_decl_annotations_ bool _StoreScanlineLinear( LPVOID pDestination, size_t size, DXGI_FORMAT format, XMVECTOR* pSource, size_t count, DWORD flags ) { assert( pDestination && size > 0 ); assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) ); assert( IsValid(format) && !IsVideo(format) && !IsTypeless(format) && !IsCompressed(format) ); switch ( format ) { case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: flags |= TEX_FILTER_SRGB; break; case DXGI_FORMAT_R32G32B32A32_FLOAT: case DXGI_FORMAT_R32G32B32_FLOAT: case DXGI_FORMAT_R16G16B16A16_FLOAT: case DXGI_FORMAT_R16G16B16A16_UNORM: case DXGI_FORMAT_R32G32_FLOAT: case DXGI_FORMAT_R10G10B10A2_UNORM: case DXGI_FORMAT_R11G11B10_FLOAT: case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R16G16_FLOAT: case DXGI_FORMAT_R16G16_UNORM: case DXGI_FORMAT_R32_FLOAT: case DXGI_FORMAT_R8G8_UNORM: case DXGI_FORMAT_R16_FLOAT: case DXGI_FORMAT_R16_UNORM: case DXGI_FORMAT_R8_UNORM: case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: case DXGI_FORMAT_R8G8_B8G8_UNORM: case DXGI_FORMAT_G8R8_G8B8_UNORM: case DXGI_FORMAT_B5G6R5_UNORM: case DXGI_FORMAT_B5G5R5A1_UNORM: case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8X8_UNORM: #ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM: #endif break; default: // can't treat A8, XR, Depth, SNORM, UINT, or SINT as sRGB flags &= ~TEX_FILTER_SRGB; break; } // sRGB output processing (Linear RGB -> sRGB) if ( flags & TEX_FILTER_SRGB_OUT ) { // To avoid the need for another temporary scanline buffer, we allow this function to overwrite the source buffer in-place // Given the intended usage in the filtering routines, this is not a problem. XMVECTOR* ptr = pSource; for( size_t i=0; i < count; ++i, ++ptr ) { *ptr = RGBToSRGB( *ptr ); } } return _StoreScanline( pDestination, size, format, pSource, count ); } //------------------------------------------------------------------------------------- // Convert from sRGB to Linear RGB // // if C_srgb <= 0.04045 -> C_linear = C_srgb / 12.92 // if C_srgb > 0.04045 -> C_linear = pow( ( C_srgb + a ) / ( 1 + a ), 2.4 ) // where a = 0.055 //------------------------------------------------------------------------------------- static inline XMVECTOR SRGBToRGB( FXMVECTOR srgb ) { static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 1.f }; static const XMVECTORF32 ILinear = { 1.f/12.92f, 1.f/12.92f, 1.f/12.92f, 1.f }; static const XMVECTORF32 Scale = { 1.f/1.055f, 1.f/1.055f, 1.f/1.055f, 1.f }; static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f }; static const XMVECTORF32 Gamma = { 2.4f, 2.4f, 2.4f, 1.f }; XMVECTOR V = XMVectorSaturate(srgb); XMVECTOR V0 = XMVectorMultiply( V, ILinear ); XMVECTOR V1 = XMVectorPow( (V + Bias) * Scale, Gamma ); XMVECTOR select = XMVectorGreater( V, Cutoff ); V = XMVectorSelect( V0, V1, select ); return XMVectorSelect( srgb, V, g_XMSelect1110 ); } _Use_decl_annotations_ bool _LoadScanlineLinear( XMVECTOR* pDestination, size_t count, LPCVOID pSource, size_t size, DXGI_FORMAT format, DWORD flags ) { assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) ); assert( pSource && size > 0 ); assert( IsValid(format) && !IsVideo(format) && !IsTypeless(format,false) && !IsCompressed(format) ); switch ( format ) { case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: flags |= TEX_FILTER_SRGB; break; case DXGI_FORMAT_R32G32B32A32_FLOAT: case DXGI_FORMAT_R32G32B32_FLOAT: case DXGI_FORMAT_R16G16B16A16_FLOAT: case DXGI_FORMAT_R16G16B16A16_UNORM: case DXGI_FORMAT_R32G32_FLOAT: case DXGI_FORMAT_R10G10B10A2_UNORM: case DXGI_FORMAT_R11G11B10_FLOAT: case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R16G16_FLOAT: case DXGI_FORMAT_R16G16_UNORM: case DXGI_FORMAT_R32_FLOAT: case DXGI_FORMAT_R8G8_UNORM: case DXGI_FORMAT_R16_FLOAT: case DXGI_FORMAT_R16_UNORM: case DXGI_FORMAT_R8_UNORM: case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: case DXGI_FORMAT_R8G8_B8G8_UNORM: case DXGI_FORMAT_G8R8_G8B8_UNORM: case DXGI_FORMAT_B5G6R5_UNORM: case DXGI_FORMAT_B5G5R5A1_UNORM: case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8X8_UNORM: #ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM: #endif break; default: // can't treat A8, XR, Depth, SNORM, UINT, or SINT as sRGB flags &= ~TEX_FILTER_SRGB; break; } if ( _LoadScanline( pDestination, count, pSource, size, format ) ) { // sRGB input processing (sRGB -> Linear RGB) if ( flags & TEX_FILTER_SRGB_IN ) { XMVECTOR* ptr = pDestination; for( size_t i=0; i < count; ++i, ++ptr ) { *ptr = SRGBToRGB( *ptr ); } } return true; } return false; } //------------------------------------------------------------------------------------- // Convert scanline based on source/target formats //------------------------------------------------------------------------------------- struct ConvertData { DXGI_FORMAT format; size_t datasize; DWORD flags; }; static const ConvertData g_ConvertTable[] = { { DXGI_FORMAT_R32G32B32A32_FLOAT, 32, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R32G32B32A32_UINT, 32, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R32G32B32A32_SINT, 32, CONVF_SINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R32G32B32_FLOAT, 32, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_R32G32B32_UINT, 32, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_R32G32B32_SINT, 32, CONVF_SINT | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_R16G16B16A16_FLOAT, 16, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R16G16B16A16_UNORM, 16, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R16G16B16A16_UINT, 16, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R16G16B16A16_SNORM, 16, CONVF_SNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R16G16B16A16_SINT, 16, CONVF_SINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R32G32_FLOAT, 32, CONVF_FLOAT | CONVF_R | CONVF_G }, { DXGI_FORMAT_R32G32_UINT, 32, CONVF_UINT | CONVF_R | CONVF_G }, { DXGI_FORMAT_R32G32_SINT, 32, CONVF_SINT | CONVF_R | CONVF_G }, { DXGI_FORMAT_D32_FLOAT_S8X24_UINT, 32, CONVF_FLOAT | CONVF_DEPTH | CONVF_STENCIL }, { DXGI_FORMAT_R10G10B10A2_UNORM, 10, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R10G10B10A2_UINT, 10, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R11G11B10_FLOAT, 10, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_R8G8B8A8_UNORM, 8, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 8, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R8G8B8A8_UINT, 8, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R8G8B8A8_SNORM, 8, CONVF_SNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R8G8B8A8_SINT, 8, CONVF_SINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_R16G16_FLOAT, 16, CONVF_FLOAT | CONVF_R | CONVF_G }, { DXGI_FORMAT_R16G16_UNORM, 16, CONVF_UNORM | CONVF_R | CONVF_G }, { DXGI_FORMAT_R16G16_UINT, 16, CONVF_UINT | CONVF_R | CONVF_G }, { DXGI_FORMAT_R16G16_SNORM, 16, CONVF_SNORM | CONVF_R | CONVF_G }, { DXGI_FORMAT_R16G16_SINT, 16, CONVF_SINT | CONVF_R | CONVF_G }, { DXGI_FORMAT_D32_FLOAT, 32, CONVF_FLOAT | CONVF_DEPTH }, { DXGI_FORMAT_R32_FLOAT, 32, CONVF_FLOAT | CONVF_R }, { DXGI_FORMAT_R32_UINT, 32, CONVF_UINT | CONVF_R }, { DXGI_FORMAT_R32_SINT, 32, CONVF_SINT | CONVF_R }, { DXGI_FORMAT_D24_UNORM_S8_UINT, 32, CONVF_UNORM | CONVF_DEPTH | CONVF_STENCIL }, { DXGI_FORMAT_R8G8_UNORM, 8, CONVF_UNORM | CONVF_R | CONVF_G }, { DXGI_FORMAT_R8G8_UINT, 8, CONVF_UINT | CONVF_R | CONVF_G }, { DXGI_FORMAT_R8G8_SNORM, 8, CONVF_SNORM | CONVF_R | CONVF_G }, { DXGI_FORMAT_R8G8_SINT, 8, CONVF_SINT | CONVF_R | CONVF_G }, { DXGI_FORMAT_R16_FLOAT, 16, CONVF_FLOAT | CONVF_R }, { DXGI_FORMAT_D16_UNORM, 16, CONVF_UNORM | CONVF_DEPTH }, { DXGI_FORMAT_R16_UNORM, 16, CONVF_UNORM | CONVF_R }, { DXGI_FORMAT_R16_UINT, 16, CONVF_UINT | CONVF_R }, { DXGI_FORMAT_R16_SNORM, 16, CONVF_SNORM | CONVF_R }, { DXGI_FORMAT_R16_SINT, 16, CONVF_SINT | CONVF_R }, { DXGI_FORMAT_R8_UNORM, 8, CONVF_UNORM | CONVF_R }, { DXGI_FORMAT_R8_UINT, 8, CONVF_UINT | CONVF_R }, { DXGI_FORMAT_R8_SNORM, 8, CONVF_SNORM | CONVF_R }, { DXGI_FORMAT_R8_SINT, 8, CONVF_SINT | CONVF_R }, { DXGI_FORMAT_A8_UNORM, 8, CONVF_UNORM | CONVF_A }, { DXGI_FORMAT_R1_UNORM, 1, CONVF_UNORM | CONVF_R }, { DXGI_FORMAT_R9G9B9E5_SHAREDEXP, 9, CONVF_SHAREDEXP | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_R8G8_B8G8_UNORM, 8, CONVF_UNORM | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_G8R8_G8B8_UNORM, 8, CONVF_UNORM | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_BC1_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC1_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC2_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC2_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC3_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC3_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC4_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R }, { DXGI_FORMAT_BC4_SNORM, 8, CONVF_SNORM | CONVF_BC | CONVF_R }, { DXGI_FORMAT_BC5_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G }, { DXGI_FORMAT_BC5_SNORM, 8, CONVF_SNORM | CONVF_BC | CONVF_R | CONVF_G }, { DXGI_FORMAT_B5G6R5_UNORM, 5, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_B5G5R5A1_UNORM, 5, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_B8G8R8A8_UNORM, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_B8G8R8X8_UNORM, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, 10, CONVF_UNORM | CONVF_XR | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_BC6H_UF16, 16, CONVF_FLOAT | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC6H_SF16, 16, CONVF_FLOAT | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC7_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC7_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, #ifdef DXGI_1_2_FORMATS { DXGI_FORMAT_B4G4R4A4_UNORM, 4, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, #endif }; #pragma prefast( suppress : 25004, "Signature must match bsearch_s" ); static int __cdecl _ConvertCompare( void *context, const void* ptr1, const void *ptr2 ) { UNREFERENCED_PARAMETER(context); const ConvertData *p1 = reinterpret_cast(ptr1); const ConvertData *p2 = reinterpret_cast(ptr2); if ( p1->format == p2->format ) return 0; else return (p1->format < p2->format ) ? -1 : 1; } _Use_decl_annotations_ DWORD _GetConvertFlags( DXGI_FORMAT format ) { #ifdef _DEBUG // Ensure conversion table is in ascending order assert( _countof(g_ConvertTable) > 0 ); DXGI_FORMAT lastvalue = g_ConvertTable[0].format; for( size_t index=1; index < _countof(g_ConvertTable); ++index ) { assert( g_ConvertTable[index].format > lastvalue ); lastvalue = g_ConvertTable[index].format; } #endif ConvertData key = { format, 0 }; const ConvertData* in = (const ConvertData*) bsearch_s( &key, g_ConvertTable, _countof(g_ConvertTable), sizeof(ConvertData), _ConvertCompare, 0 ); return (in) ? in->flags : 0; } _Use_decl_annotations_ void _ConvertScanline( XMVECTOR* pBuffer, size_t count, DXGI_FORMAT outFormat, DXGI_FORMAT inFormat, DWORD flags ) { assert( pBuffer && count > 0 && (((uintptr_t)pBuffer & 0xF) == 0) ); assert( IsValid(outFormat) && !IsVideo(outFormat) && !IsTypeless(outFormat) ); assert( IsValid(inFormat) && !IsVideo(inFormat) && !IsTypeless(inFormat) ); if ( !pBuffer ) return; #ifdef _DEBUG // Ensure conversion table is in ascending order assert( _countof(g_ConvertTable) > 0 ); DXGI_FORMAT lastvalue = g_ConvertTable[0].format; for( size_t index=1; index < _countof(g_ConvertTable); ++index ) { assert( g_ConvertTable[index].format > lastvalue ); lastvalue = g_ConvertTable[index].format; } #endif // Determine conversion details about source and dest formats ConvertData key = { inFormat, 0 }; const ConvertData* in = (const ConvertData*) bsearch_s( &key, g_ConvertTable, _countof(g_ConvertTable), sizeof(ConvertData), _ConvertCompare, 0 ); key.format = outFormat; const ConvertData* out = (const ConvertData*) bsearch_s( &key, g_ConvertTable, _countof(g_ConvertTable), sizeof(ConvertData), _ConvertCompare, 0 ); if ( !in || !out ) { assert(false); return; } assert( _GetConvertFlags( inFormat ) == in->flags ); assert( _GetConvertFlags( outFormat ) == out->flags ); // Handle SRGB filtering modes switch ( inFormat ) { case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: case DXGI_FORMAT_BC1_UNORM_SRGB: case DXGI_FORMAT_BC2_UNORM_SRGB: case DXGI_FORMAT_BC3_UNORM_SRGB: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: case DXGI_FORMAT_BC7_UNORM_SRGB: flags |= TEX_FILTER_SRGB_IN; break; case DXGI_FORMAT_A8_UNORM: case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: flags &= ~TEX_FILTER_SRGB_IN; break; } switch ( outFormat ) { case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: case DXGI_FORMAT_BC1_UNORM_SRGB: case DXGI_FORMAT_BC2_UNORM_SRGB: case DXGI_FORMAT_BC3_UNORM_SRGB: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: case DXGI_FORMAT_BC7_UNORM_SRGB: flags |= TEX_FILTER_SRGB_OUT; break; case DXGI_FORMAT_A8_UNORM: case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: flags &= ~TEX_FILTER_SRGB_OUT; break; } if ( (flags & (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT)) == (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT) ) { flags &= ~(TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT); } // sRGB input processing (sRGB -> Linear RGB) if ( flags & TEX_FILTER_SRGB_IN ) { if ( !(in->flags & CONVF_DEPTH) && ( (in->flags & CONVF_FLOAT) || (in->flags & CONVF_UNORM) ) ) { XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i, ++ptr ) { *ptr = SRGBToRGB( *ptr ); } } } // Handle conversion special cases DWORD diffFlags = in->flags ^ out->flags; if ( diffFlags != 0) { if ( out->flags & CONVF_UNORM ) { if ( in->flags & CONVF_SNORM ) { // SNORM -> UNORM XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; *ptr++ = XMVectorMultiplyAdd( v, g_XMOneHalf, g_XMOneHalf ); } } else if ( in->flags & CONVF_FLOAT ) { // FLOAT -> UNORM XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; *ptr++ = XMVectorSaturate( v ); } } } else if ( out->flags & CONVF_SNORM ) { if ( in->flags & CONVF_UNORM ) { // UNORM -> SNORM static XMVECTORF32 two = { 2.0f, 2.0f, 2.0f, 2.0f }; XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; *ptr++ = XMVectorMultiplyAdd( v, two, g_XMNegativeOne ); } } else if ( in->flags & CONVF_FLOAT ) { // FLOAT -> SNORM XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; *ptr++ = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); } } } // !CONVF_A -> CONVF_A is handled because LoadScanline ensures alpha defaults to 1.0 for no-alpha formats // CONVF_PACKED cases are handled because LoadScanline/StoreScanline handles packing/unpacking if ( ((out->flags & CONVF_RGBA_MASK) == CONVF_A) && !(in->flags & CONVF_A) ) { // !CONVF_A -> A format XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; *ptr++ = XMVectorSplatX( v ); } } else if ( ((in->flags & CONVF_RGBA_MASK) == CONVF_A) && !(out->flags & CONVF_A) ) { // A format -> !CONVF_A XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; *ptr++ = XMVectorSplatW( v ); } } else if ( (in->flags & CONVF_RGB_MASK) == CONVF_R ) { if ( (out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G|CONVF_B) ) { // R format -> RGB format XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; XMVECTOR v1 = XMVectorSplatX( v ); *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); } } else if ( (out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G) ) { // R format -> RG format XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; XMVECTOR v1 = XMVectorSplatX( v ); *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 ); } } } else if ( (in->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G|CONVF_B) ) { if ( (out->flags & CONVF_RGB_MASK) == CONVF_R ) { // RGB format -> R format switch( flags & ( TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN | TEX_FILTER_RGB_COPY_BLUE ) ) { case TEX_FILTER_RGB_COPY_RED: // Leave data unchanged and the store will handle this... break; case TEX_FILTER_RGB_COPY_GREEN: { XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; XMVECTOR v1 = XMVectorSplatY( v ); *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); } } break; case TEX_FILTER_RGB_COPY_BLUE: { XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; XMVECTOR v1 = XMVectorSplatZ( v ); *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); } } break; default: { XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; XMVECTOR v1 = XMVector3Dot( v, g_Grayscale ); *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); } } break; } } else if ( (out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G) ) { // RGB format -> RG format switch( flags & ( TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN | TEX_FILTER_RGB_COPY_BLUE ) ) { case TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_BLUE: { XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; XMVECTOR v1 = XMVectorSwizzle<0,2,0,2>( v ); *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 ); } } break; case TEX_FILTER_RGB_COPY_GREEN | TEX_FILTER_RGB_COPY_BLUE: { XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; XMVECTOR v1 = XMVectorSwizzle<1,2,3,0>( v ); *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 ); } } break; case TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN: default: // Leave data unchanged and the store will handle this... break; } } } } // sRGB output processing (Linear RGB -> sRGB) if ( flags & TEX_FILTER_SRGB_OUT ) { if ( !(out->flags & CONVF_DEPTH) && ( (out->flags & CONVF_FLOAT) || (out->flags & CONVF_UNORM) ) ) { XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i, ++ptr ) { *ptr = RGBToSRGB( *ptr ); } } } } //------------------------------------------------------------------------------------- // Dithering //------------------------------------------------------------------------------------- // 4X4X4 ordered dithering matrix static const float g_Dither[] = { // (z & 3) + ( (y & 3) * 8) + (x & 3) 0.468750f, -0.031250f, 0.343750f, -0.156250f, 0.468750f, -0.031250f, 0.343750f, -0.156250f, -0.281250f, 0.218750f, -0.406250f, 0.093750f, -0.281250f, 0.218750f, -0.406250f, 0.093750f, 0.281250f, -0.218750f, 0.406250f, -0.093750f, 0.281250f, -0.218750f, 0.406250f, -0.093750f, -0.468750f, 0.031250f, -0.343750f, 0.156250f, -0.468750f, 0.031250f, -0.343750f, 0.156250f, }; static const XMVECTORF32 g_Scale16pc = { 65535.f, 65535.f, 65535.f, 65535.f }; static const XMVECTORF32 g_Scale15pc = { 32767.f, 32767.f, 32767.f, 32767.f }; static const XMVECTORF32 g_Scale10pc = { 1023.f, 1023.f, 1023.f, 3.f }; static const XMVECTORF32 g_Scale8pc = { 255.f, 255.f, 255.f, 255.f }; static const XMVECTORF32 g_Scale7pc = { 127.f, 127.f, 127.f, 127.f }; static const XMVECTORF32 g_Scale565pc = { 31.f, 63.f, 31.f, 1.f }; static const XMVECTORF32 g_Scale5551pc = { 31.f, 31.f, 31.f, 1.f }; static const XMVECTORF32 g_Scale4pc = { 15.f, 15.f, 15.f, 15.f }; static const XMVECTORF32 g_ErrorWeight3 = { 3.f/16.f, 3.f/16.f, 3.f/16.f, 3.f/16.f }; static const XMVECTORF32 g_ErrorWeight5 = { 5.f/16.f, 5.f/16.f, 5.f/16.f, 5.f/16.f }; static const XMVECTORF32 g_ErrorWeight1 = { 1.f/16.f, 1.f/16.f, 1.f/16.f, 1.f/16.f }; static const XMVECTORF32 g_ErrorWeight7 = { 7.f/16.f, 7.f/16.f, 7.f/16.f, 7.f/16.f }; #define STORE_SCANLINE( type, scalev, clampzero, norm, itype, mask, row, bgr ) \ if ( size >= sizeof(type) ) \ { \ type * __restrict dest = reinterpret_cast(pDestination); \ for( size_t i = 0; i < count; ++i ) \ { \ ptrdiff_t index = static_cast( ( row & 1 ) ? ( count - i - 1 ) : i ); \ ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \ \ XMVECTOR v = sPtr[ index ]; \ if ( bgr ) { v = XMVectorSwizzle<2, 1, 0, 3>( v ); } \ if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \ else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \ else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \ else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \ v = XMVectorAdd( v, vError ); \ if ( norm ) v = XMVectorMultiply( v, scalev ); \ \ XMVECTOR target; \ if ( pDiffusionErrors ) \ { \ target = XMVectorRound( v ); \ vError = XMVectorSubtract( v, target ); \ if (norm) vError = XMVectorDivide( vError, scalev ); \ \ /* Distribute error to next scanline and next pixel */ \ pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \ pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \ pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \ vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \ } \ else \ { \ /* Applied ordered dither */ \ target = XMVectorAdd( v, ordered[ index & 3 ] ); \ target = XMVectorRound( v ); \ } \ \ target = XMVectorMin( scalev, target ); \ target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \ \ XMFLOAT4A tmp; \ XMStoreFloat4A( &tmp, target ); \ \ auto dPtr = &dest[ index ]; \ dPtr->x = static_cast( tmp.x ) & mask; \ dPtr->y = static_cast( tmp.y ) & mask; \ dPtr->z = static_cast( tmp.z ) & mask; \ dPtr->w = static_cast( tmp.w ) & mask; \ } \ return true; \ } \ return false; #define STORE_SCANLINE2( type, scalev, clampzero, norm, itype, mask, row ) \ /* The 2 component cases are always bgr=false */ \ if ( size >= sizeof(type) ) \ { \ type * __restrict dest = reinterpret_cast(pDestination); \ for( size_t i = 0; i < count; ++i ) \ { \ ptrdiff_t index = static_cast( ( row & 1 ) ? ( count - i - 1 ) : i ); \ ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \ \ XMVECTOR v = sPtr[ index ]; \ if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \ else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \ else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \ else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \ v = XMVectorAdd( v, vError ); \ if ( norm ) v = XMVectorMultiply( v, scalev ); \ \ XMVECTOR target; \ if ( pDiffusionErrors ) \ { \ target = XMVectorRound( v ); \ vError = XMVectorSubtract( v, target ); \ if (norm) vError = XMVectorDivide( vError, scalev ); \ \ /* Distribute error to next scanline and next pixel */ \ pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \ pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \ pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \ vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \ } \ else \ { \ /* Applied ordered dither */ \ target = XMVectorAdd( v, ordered[ index & 3 ] ); \ target = XMVectorRound( v ); \ } \ \ target = XMVectorMin( scalev, target ); \ target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \ \ XMFLOAT4A tmp; \ XMStoreFloat4A( &tmp, target ); \ \ auto dPtr = &dest[ index ]; \ dPtr->x = static_cast( tmp.x ) & mask; \ dPtr->y = static_cast( tmp.y ) & mask; \ } \ return true; \ } \ return false; #define STORE_SCANLINE1( type, scalev, clampzero, norm, mask, row, selectw ) \ /* The 1 component cases are always bgr=false */ \ if ( size >= sizeof(type) ) \ { \ type * __restrict dest = reinterpret_cast(pDestination); \ for( size_t i = 0; i < count; ++i ) \ { \ ptrdiff_t index = static_cast( ( row & 1 ) ? ( count - i - 1 ) : i ); \ ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \ \ XMVECTOR v = sPtr[ index ]; \ if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \ else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \ else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \ else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \ v = XMVectorAdd( v, vError ); \ if ( norm ) v = XMVectorMultiply( v, scalev ); \ \ XMVECTOR target; \ if ( pDiffusionErrors ) \ { \ target = XMVectorRound( v ); \ vError = XMVectorSubtract( v, target ); \ if (norm) vError = XMVectorDivide( vError, scalev ); \ \ /* Distribute error to next scanline and next pixel */ \ pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \ pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \ pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \ vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \ } \ else \ { \ /* Applied ordered dither */ \ target = XMVectorAdd( v, ordered[ index & 3 ] ); \ target = XMVectorRound( v ); \ } \ \ target = XMVectorMin( scalev, target ); \ target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \ \ dest[ index ] = static_cast( (selectw) ? XMVectorGetW( target ) : XMVectorGetX( target ) ) & mask; \ } \ return true; \ } \ return false; #pragma warning(push) #pragma warning( disable : 4127 ) _Use_decl_annotations_ bool _StoreScanlineDither( LPVOID pDestination, size_t size, DXGI_FORMAT format, XMVECTOR* pSource, size_t count, float threshold, size_t y, size_t z, XMVECTOR* pDiffusionErrors ) { assert( pDestination && size > 0 ); assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) ); assert( IsValid(format) && !IsVideo(format) && !IsTypeless(format) && !IsCompressed(format) ); XMVECTOR ordered[4]; if ( pDiffusionErrors ) { // If pDiffusionErrors != 0, then this function performs error diffusion dithering (aka Floyd-Steinberg dithering) // To avoid the need for another temporary scanline buffer, we allow this function to overwrite the source buffer in-place // Given the intended usage in the conversion routines, this is not a problem. XMVECTOR* ptr = pSource; const XMVECTOR* err = pDiffusionErrors + 1; for( size_t i=0; i < count; ++i ) { // Add contribution from previous scanline XMVECTOR v = XMVectorAdd( *ptr, *err++ ); *ptr++ = v; } // Reset errors for next scanline memset( pDiffusionErrors, 0, sizeof(XMVECTOR)*(count+2) ); } else { // If pDiffusionErrors == 0, then this function performs ordered dithering XMVECTOR dither = XMLoadFloat4( reinterpret_cast( g_Dither + (z & 3) + ( (y & 3) * 8 ) ) ); ordered[0] = XMVectorSplatX( dither ); ordered[1] = XMVectorSplatY( dither ); ordered[2] = XMVectorSplatZ( dither ); ordered[3] = XMVectorSplatW( dither ); } const XMVECTOR* __restrict sPtr = pSource; if ( !sPtr ) return false; XMVECTOR vError = XMVectorZero(); switch( format ) { case DXGI_FORMAT_R16G16B16A16_UNORM: STORE_SCANLINE( XMUSHORTN4, g_Scale16pc, true, true, uint16_t, 0xFFFF, y, false ) case DXGI_FORMAT_R16G16B16A16_UINT: STORE_SCANLINE( XMUSHORT4, g_Scale16pc, true, false, uint16_t, 0xFFFF, y, false ) case DXGI_FORMAT_R16G16B16A16_SNORM: STORE_SCANLINE( XMSHORTN4, g_Scale15pc, false, true, int16_t, 0xFFFF, y, false ) case DXGI_FORMAT_R16G16B16A16_SINT: STORE_SCANLINE( XMSHORT4, g_Scale15pc, false, false, int16_t, 0xFFFF, y, false ) case DXGI_FORMAT_R10G10B10A2_UNORM: STORE_SCANLINE( XMUDECN4, g_Scale10pc, true, true, uint16_t, 0x3FF, y, false ) case DXGI_FORMAT_R10G10B10A2_UINT: STORE_SCANLINE( XMUDEC4, g_Scale10pc, true, false, uint16_t, 0x3FF, y, false ) case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: if ( size >= sizeof(XMUDEC4) ) { static const XMVECTORF32 Scale = { 510.0f, 510.0f, 510.0f, 3.0f }; static const XMVECTORF32 Bias = { 384.0f, 384.0f, 384.0f, 0.0f }; static const XMVECTORF32 MinXR = { -0.7529f, -0.7529f, -0.7529f, 0.f }; static const XMVECTORF32 MaxXR = { 1.2529f, 1.2529f, 1.2529f, 1.0f }; XMUDEC4 * __restrict dest = reinterpret_cast(pDestination); for( size_t i = 0; i < count; ++i ) { ptrdiff_t index = static_cast( ( y & 1 ) ? ( count - i - 1 ) : i ); ptrdiff_t delta = ( y & 1 ) ? -2 : 0; XMVECTOR v = XMVectorClamp( sPtr[ index ], MinXR, MaxXR ); v = XMVectorMultiplyAdd( v, Scale, vError ); XMVECTOR target; if ( pDiffusionErrors ) { target = XMVectorRound( v ); vError = XMVectorSubtract( v, target ); vError = XMVectorDivide( vError, Scale ); // Distribute error to next scanline and next pixel pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); vError = XMVectorMultiply( vError, g_ErrorWeight7 ); } else { // Applied ordered dither target = XMVectorAdd( v, ordered[ index & 3 ] ); target = XMVectorRound( v ); } target = XMVectorAdd( target, Bias ); target = XMVectorClamp( target, g_XMZero, g_Scale10pc ); XMFLOAT4A tmp; XMStoreFloat4A( &tmp, target ); auto dPtr = &dest[ index ]; dPtr->x = static_cast( tmp.x ) & 0x3FF; dPtr->y = static_cast( tmp.y ) & 0x3FF; dPtr->z = static_cast( tmp.z ) & 0x3FF; dPtr->w = static_cast( tmp.w ); } return true; } return false; case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: STORE_SCANLINE( XMUBYTEN4, g_Scale8pc, true, true, uint8_t, 0xFF, y, false ) case DXGI_FORMAT_R8G8B8A8_UINT: STORE_SCANLINE( XMUBYTE4, g_Scale8pc, true, false, uint8_t, 0xFF, y, false ) case DXGI_FORMAT_R8G8B8A8_SNORM: STORE_SCANLINE( XMBYTEN4, g_Scale7pc, false, true, int8_t, 0xFF, y, false ) case DXGI_FORMAT_R8G8B8A8_SINT: STORE_SCANLINE( XMBYTE4, g_Scale7pc, false, false, int8_t, 0xFF, y, false ) case DXGI_FORMAT_R16G16_UNORM: STORE_SCANLINE2( XMUSHORTN2, g_Scale16pc, true, true, uint16_t, 0xFFFF, y ) case DXGI_FORMAT_R16G16_UINT: STORE_SCANLINE2( XMUSHORT2, g_Scale16pc, true, false, uint16_t, 0xFFFF, y ) case DXGI_FORMAT_R16G16_SNORM: STORE_SCANLINE2( XMSHORTN2, g_Scale15pc, false, true, int16_t, 0xFFFF, y ) case DXGI_FORMAT_R16G16_SINT: STORE_SCANLINE2( XMSHORT2, g_Scale15pc, false, false, int16_t, 0xFFFF, y ) case DXGI_FORMAT_D24_UNORM_S8_UINT: if ( size >= sizeof(uint32_t) ) { static const XMVECTORF32 Clamp = { 1.f, 255.f, 0.f, 0.f }; static const XMVECTORF32 Scale = { 16777215.f, 1.f, 0.f, 0.f }; static const XMVECTORF32 Scale2 = { 16777215.f, 255.f, 0.f, 0.f }; uint32_t * __restrict dest = reinterpret_cast(pDestination); for( size_t i = 0; i < count; ++i ) { ptrdiff_t index = static_cast( ( y & 1 ) ? ( count - i - 1 ) : i ); ptrdiff_t delta = ( y & 1 ) ? -2 : 0; XMVECTOR v = XMVectorClamp( sPtr[ index ], g_XMZero, Clamp ); v = XMVectorAdd( v, vError ); v = XMVectorMultiply( v, Scale ); XMVECTOR target; if ( pDiffusionErrors ) { target = XMVectorRound( v ); vError = XMVectorSubtract( v, target ); vError = XMVectorDivide( vError, Scale ); // Distribute error to next scanline and next pixel pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); vError = XMVectorMultiply( vError, g_ErrorWeight7 ); } else { // Applied ordered dither target = XMVectorAdd( v, ordered[ index & 3 ] ); target = XMVectorRound( v ); } target = XMVectorClamp( target, g_XMZero, Scale2 ); XMFLOAT4A tmp; XMStoreFloat4A( &tmp, target ); auto dPtr = &dest[ index ]; *dPtr = (static_cast( tmp.x ) & 0xFFFFFF) | ((static_cast( tmp.y ) & 0xFF) << 24); } return true; } return false; case DXGI_FORMAT_R8G8_UNORM: STORE_SCANLINE2( XMUBYTEN2, g_Scale8pc, true, true, uint8_t, 0xFF, y ) case DXGI_FORMAT_R8G8_UINT: STORE_SCANLINE2( XMUBYTE2, g_Scale8pc, true, false, uint8_t, 0xFF, y ) case DXGI_FORMAT_R8G8_SNORM: STORE_SCANLINE2( XMBYTEN2, g_Scale7pc, false, true, int8_t, 0xFF, y ) case DXGI_FORMAT_R8G8_SINT: STORE_SCANLINE2( XMBYTE2, g_Scale7pc, false, false, int8_t, 0xFF, y ) case DXGI_FORMAT_D16_UNORM: case DXGI_FORMAT_R16_UNORM: STORE_SCANLINE1( uint16_t, g_Scale16pc, true, true, 0xFFFF, y, false ) case DXGI_FORMAT_R16_UINT: STORE_SCANLINE1( uint16_t, g_Scale16pc, true, false, 0xFFFF, y, false ) case DXGI_FORMAT_R16_SNORM: STORE_SCANLINE1( int16_t, g_Scale15pc, false, true, 0xFFFF, y, false ) case DXGI_FORMAT_R16_SINT: STORE_SCANLINE1( int16_t, g_Scale15pc, false, false, 0xFFFF, y, false ) case DXGI_FORMAT_R8_UNORM: STORE_SCANLINE1( uint8_t, g_Scale8pc, true, true, 0xFF, y, false ) case DXGI_FORMAT_R8_UINT: STORE_SCANLINE1( uint8_t, g_Scale8pc, true, false, 0xFF, y, false ) case DXGI_FORMAT_R8_SNORM: STORE_SCANLINE1( int8_t, g_Scale7pc, false, true, 0xFF, y, false ) case DXGI_FORMAT_R8_SINT: STORE_SCANLINE1( int8_t, g_Scale7pc, false, false, 0xFF, y, false ) case DXGI_FORMAT_A8_UNORM: STORE_SCANLINE1( uint8_t, g_Scale8pc, true, true, 0xFF, y, true ) case DXGI_FORMAT_B5G6R5_UNORM: if ( size >= sizeof(XMU565) ) { XMU565 * __restrict dest = reinterpret_cast(pDestination); for( size_t i = 0; i < count; ++i ) { ptrdiff_t index = static_cast( ( y & 1 ) ? ( count - i - 1 ) : i ); ptrdiff_t delta = ( y & 1 ) ? -2 : 0; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] ); v = XMVectorSaturate( v ); v = XMVectorAdd( v, vError ); v = XMVectorMultiply( v, g_Scale565pc ); XMVECTOR target; if ( pDiffusionErrors ) { target = XMVectorRound( v ); vError = XMVectorSubtract( v, target ); vError = XMVectorDivide( vError, g_Scale565pc ); // Distribute error to next scanline and next pixel pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); vError = XMVectorMultiply( vError, g_ErrorWeight7 ); } else { // Applied ordered dither target = XMVectorAdd( v, ordered[ index & 3 ] ); target = XMVectorRound( v ); } target = XMVectorClamp( target, g_XMZero, g_Scale565pc ); XMFLOAT4A tmp; XMStoreFloat4A( &tmp, target ); auto dPtr = &dest[ index ]; dPtr->x = static_cast( tmp.x ) & 0x1F; dPtr->y = static_cast( tmp.y ) & 0x3F; dPtr->z = static_cast( tmp.z ) & 0x1F; } return true; } return false; case DXGI_FORMAT_B5G5R5A1_UNORM: if ( size >= sizeof(XMU555) ) { XMU555 * __restrict dest = reinterpret_cast(pDestination); for( size_t i = 0; i < count; ++i ) { ptrdiff_t index = static_cast( ( y & 1 ) ? ( count - i - 1 ) : i ); ptrdiff_t delta = ( y & 1 ) ? -2 : 0; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] ); v = XMVectorSaturate( v ); v = XMVectorAdd( v, vError ); v = XMVectorMultiply( v, g_Scale5551pc ); XMVECTOR target; if ( pDiffusionErrors ) { target = XMVectorRound( v ); vError = XMVectorSubtract( v, target ); vError = XMVectorDivide( vError, g_Scale5551pc ); // Distribute error to next scanline and next pixel pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); vError = XMVectorMultiply( vError, g_ErrorWeight7 ); } else { // Applied ordered dither target = XMVectorAdd( v, ordered[ index & 3 ] ); target = XMVectorRound( v ); } target = XMVectorClamp( target, g_XMZero, g_Scale5551pc ); XMFLOAT4A tmp; XMStoreFloat4A( &tmp, target ); auto dPtr = &dest[ index ]; dPtr->x = static_cast( tmp.x ) & 0x1F; dPtr->y = static_cast( tmp.y ) & 0x1F; dPtr->z = static_cast( tmp.z ) & 0x1F; dPtr->w = ( XMVectorGetW( target ) > threshold ) ? 1 : 0; } return true; } return false; case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: STORE_SCANLINE( XMUBYTEN4, g_Scale8pc, true, true, uint8_t, 0xFF, y, true ) case DXGI_FORMAT_B8G8R8X8_UNORM: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: if ( size >= sizeof(XMUBYTEN4) ) { XMUBYTEN4 * __restrict dest = reinterpret_cast(pDestination); for( size_t i = 0; i < count; ++i ) { ptrdiff_t index = static_cast( ( y & 1 ) ? ( count - i - 1 ) : i ); ptrdiff_t delta = ( y & 1 ) ? -2 : 0; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] ); v = XMVectorSaturate( v ); v = XMVectorAdd( v, vError ); v = XMVectorMultiply( v, g_Scale8pc ); XMVECTOR target; if ( pDiffusionErrors ) { target = XMVectorRound( v ); vError = XMVectorSubtract( v, target ); vError = XMVectorDivide( vError, g_Scale8pc ); // Distribute error to next scanline and next pixel pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); vError = XMVectorMultiply( vError, g_ErrorWeight7 ); } else { // Applied ordered dither target = XMVectorAdd( v, ordered[ index & 3 ] ); target = XMVectorRound( v ); } target = XMVectorClamp( target, g_XMZero, g_Scale8pc ); XMFLOAT4A tmp; XMStoreFloat4A( &tmp, target ); auto dPtr = &dest[ index ]; dPtr->x = static_cast( tmp.x ) & 0xFF; dPtr->y = static_cast( tmp.y ) & 0xFF; dPtr->z = static_cast( tmp.z ) & 0xFF; dPtr->w = 0; } return true; } return false; #ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM: STORE_SCANLINE( XMUNIBBLE4, g_Scale4pc, true, true, uint8_t, 0xF, y, true ) #endif default: return _StoreScanline( pDestination, size, format, pSource, count, threshold ); } } #pragma warning(pop) #undef STORE_SCANLINE #undef STORE_SCANLINE2 #undef STORE_SCANLINE1 //------------------------------------------------------------------------------------- // Selection logic for using WIC vs. our own routines //------------------------------------------------------------------------------------- static inline bool _UseWICConversion( _In_ DWORD filter, _In_ DXGI_FORMAT sformat, _In_ DXGI_FORMAT tformat, _Out_ WICPixelFormatGUID& pfGUID, _Out_ WICPixelFormatGUID& targetGUID ) { memcpy( &pfGUID, &GUID_NULL, sizeof(GUID) ); memcpy( &targetGUID, &GUID_NULL, sizeof(GUID) ); if ( filter & TEX_FILTER_FORCE_NON_WIC ) { // Explicit flag indicates use of non-WIC code paths return false; } if ( !_DXGIToWIC( sformat, pfGUID ) || !_DXGIToWIC( tformat, targetGUID ) ) { // Source or target format are not WIC supported native pixel formats return false; } if ( filter & TEX_FILTER_FORCE_WIC ) { // Explicit flag to use WIC code paths, skips all the case checks below return true; } if ( filter & TEX_FILTER_SEPARATE_ALPHA ) { // Alpha is not premultiplied, so use non-WIC code paths return false; } // Check for special cases switch ( sformat ) { case DXGI_FORMAT_R32G32B32A32_FLOAT: case DXGI_FORMAT_R32G32B32_FLOAT: case DXGI_FORMAT_R16G16B16A16_FLOAT: switch( tformat ) { case DXGI_FORMAT_R16_FLOAT: case DXGI_FORMAT_R32_FLOAT: case DXGI_FORMAT_D32_FLOAT: // WIC converts via UNORM formats and ends up converting colorspaces for these cases case DXGI_FORMAT_A8_UNORM: // Conversion logic for these kinds of textures is unintuitive for WIC code paths return false; } break; case DXGI_FORMAT_R16_FLOAT: switch( tformat ) { case DXGI_FORMAT_R32_FLOAT: case DXGI_FORMAT_D32_FLOAT: // WIC converts via UNORM formats and ends up converting colorspaces for these cases case DXGI_FORMAT_A8_UNORM: // Conversion logic for these kinds of textures is unintuitive for WIC code paths return false; } break; case DXGI_FORMAT_A8_UNORM: // Conversion logic for these kinds of textures is unintuitive for WIC code paths return false; default: switch( tformat ) { case DXGI_FORMAT_A8_UNORM: // Conversion logic for these kinds of textures is unintuitive for WIC code paths return false; } } // Check for implicit color space changes if ( IsSRGB( sformat ) ) filter |= TEX_FILTER_SRGB_IN; if ( IsSRGB( tformat ) ) filter |= TEX_FILTER_SRGB_OUT; if ( (filter & (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT)) == (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT) ) { filter &= ~(TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT); } DWORD wicsrgb = _CheckWICColorSpace( pfGUID, targetGUID ); if ( wicsrgb != (filter & (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT)) ) { // WIC will perform a colorspace conversion we didn't request return false; } return true; } //------------------------------------------------------------------------------------- // Convert the source image using WIC //------------------------------------------------------------------------------------- static HRESULT _ConvertUsingWIC( _In_ const Image& srcImage, _In_ const WICPixelFormatGUID& pfGUID, _In_ const WICPixelFormatGUID& targetGUID, _In_ DWORD filter, _In_ float threshold, _In_ const Image& destImage ) { assert( srcImage.width == destImage.width ); assert( srcImage.height == destImage.height ); IWICImagingFactory* pWIC = _GetWIC(); if ( !pWIC ) return E_NOINTERFACE; ScopedObject FC; HRESULT hr = pWIC->CreateFormatConverter( &FC ); if ( FAILED(hr) ) return hr; // Note that WIC conversion ignores the TEX_FILTER_SRGB_IN and TEX_FILTER_SRGB_OUT flags, // but also always assumes UNORM <-> FLOAT conversions are changing color spaces sRGB <-> scRGB BOOL canConvert = FALSE; hr = FC->CanConvert( pfGUID, targetGUID, &canConvert ); if ( FAILED(hr) || !canConvert ) { // This case is not an issue for the subset of WIC formats that map directly to DXGI return E_UNEXPECTED; } ScopedObject source; hr = pWIC->CreateBitmapFromMemory( static_cast( srcImage.width ), static_cast( srcImage.height ), pfGUID, static_cast( srcImage.rowPitch ), static_cast( srcImage.slicePitch ), srcImage.pixels, &source ); if ( FAILED(hr) ) return hr; hr = FC->Initialize( source.Get(), targetGUID, _GetWICDither( filter ), 0, threshold * 100.f, WICBitmapPaletteTypeCustom ); if ( FAILED(hr) ) return hr; hr = FC->CopyPixels( 0, static_cast( destImage.rowPitch ), static_cast( destImage.slicePitch ), destImage.pixels ); if ( FAILED(hr) ) return hr; return S_OK; } //------------------------------------------------------------------------------------- // Convert the source image (not using WIC) //------------------------------------------------------------------------------------- static HRESULT _Convert( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage, _In_ float threshold, _In_ size_t z ) { assert( srcImage.width == destImage.width ); assert( srcImage.height == destImage.height ); const uint8_t *pSrc = srcImage.pixels; uint8_t *pDest = destImage.pixels; if ( !pSrc || !pDest ) return E_POINTER; size_t width = srcImage.width; if ( filter & TEX_FILTER_DITHER_DIFFUSION ) { // Error diffusion dithering (aka Floyd-Steinberg dithering) ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast( _aligned_malloc( (sizeof(XMVECTOR)*(width*2 + 2)), 16 ) ) ); if ( !scanline ) return E_OUTOFMEMORY; XMVECTOR* pDiffusionErrors = scanline.get() + width; memset( pDiffusionErrors, 0, sizeof(XMVECTOR)*(width+2) ); for( size_t h = 0; h < srcImage.height; ++h ) { if ( !_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) ) return E_FAIL; _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter ); if ( !_StoreScanlineDither( pDest, destImage.rowPitch, destImage.format, scanline.get(), width, threshold, h, z, pDiffusionErrors ) ) return E_FAIL; pSrc += srcImage.rowPitch; pDest += destImage.rowPitch; } } else { ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast( _aligned_malloc( (sizeof(XMVECTOR)*width), 16 ) ) ); if ( !scanline ) return E_OUTOFMEMORY; if ( filter & TEX_FILTER_DITHER ) { // Ordered dithering for( size_t h = 0; h < srcImage.height; ++h ) { if ( !_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) ) return E_FAIL; _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter ); if ( !_StoreScanlineDither( pDest, destImage.rowPitch, destImage.format, scanline.get(), width, threshold, h, z, nullptr ) ) return E_FAIL; pSrc += srcImage.rowPitch; pDest += destImage.rowPitch; } } else { // No dithering for( size_t h = 0; h < srcImage.height; ++h ) { if ( !_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) ) return E_FAIL; _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter ); if ( !_StoreScanline( pDest, destImage.rowPitch, destImage.format, scanline.get(), width, threshold ) ) return E_FAIL; pSrc += srcImage.rowPitch; pDest += destImage.rowPitch; } } } return S_OK; } //===================================================================================== // Entry-points //===================================================================================== //------------------------------------------------------------------------------------- // Convert image //------------------------------------------------------------------------------------- _Use_decl_annotations_ HRESULT Convert( const Image& srcImage, DXGI_FORMAT format, DWORD filter, float threshold, ScratchImage& image ) { if ( (srcImage.format == format) || !IsValid( format ) ) return E_INVALIDARG; if ( !srcImage.pixels ) return E_POINTER; if ( IsCompressed(srcImage.format) || IsCompressed(format) || IsVideo(srcImage.format) || IsVideo(format) || IsTypeless(srcImage.format) || IsTypeless(format) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); #ifdef _M_X64 if ( (srcImage.width > 0xFFFFFFFF) || (srcImage.height > 0xFFFFFFFF) ) return E_INVALIDARG; #endif HRESULT hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 ); if ( FAILED(hr) ) return hr; const Image *rimage = image.GetImage( 0, 0, 0 ); if ( !rimage ) { image.Release(); return E_POINTER; } WICPixelFormatGUID pfGUID, targetGUID; if ( _UseWICConversion( filter, srcImage.format, format, pfGUID, targetGUID ) ) { hr = _ConvertUsingWIC( srcImage, pfGUID, targetGUID, filter, threshold, *rimage ); } else { hr = _Convert( srcImage, filter, *rimage, threshold, 0 ); } if ( FAILED(hr) ) { image.Release(); return hr; } return S_OK; } //------------------------------------------------------------------------------------- // Convert image (complex) //------------------------------------------------------------------------------------- _Use_decl_annotations_ HRESULT Convert( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DXGI_FORMAT format, DWORD filter, float threshold, ScratchImage& result ) { if ( !srcImages || !nimages || (metadata.format == format) || !IsValid(format) ) return E_INVALIDARG; if ( IsCompressed(metadata.format) || IsCompressed(format) || IsVideo(metadata.format) || IsVideo(format) || IsTypeless(metadata.format) || IsTypeless(format) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); #ifdef _M_X64 if ( (metadata.width > 0xFFFFFFFF) || (metadata.height > 0xFFFFFFFF) ) return E_INVALIDARG; #endif TexMetadata mdata2 = metadata; mdata2.format = format; HRESULT hr = result.Initialize( mdata2 ); if ( FAILED(hr) ) return hr; if ( nimages != result.GetImageCount() ) { result.Release(); return E_FAIL; } const Image* dest = result.GetImages(); if ( !dest ) { result.Release(); return E_POINTER; } WICPixelFormatGUID pfGUID, targetGUID; bool usewic = _UseWICConversion( filter, metadata.format, format, pfGUID, targetGUID ); switch (metadata.dimension) { case TEX_DIMENSION_TEXTURE1D: case TEX_DIMENSION_TEXTURE2D: for( size_t index=0; index < nimages; ++index ) { const Image& src = srcImages[ index ]; if ( src.format != metadata.format ) { result.Release(); return E_FAIL; } #ifdef _M_X64 if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) ) return E_FAIL; #endif const Image& dst = dest[ index ]; assert( dst.format == format ); if ( src.width != dst.width || src.height != dst.height ) { result.Release(); return E_FAIL; } if ( usewic ) { hr = _ConvertUsingWIC( src, pfGUID, targetGUID, filter, threshold, dst ); } else { hr = _Convert( src, filter, dst, threshold, 0 ); } if ( FAILED(hr) ) { result.Release(); return hr; } } break; case TEX_DIMENSION_TEXTURE3D: { size_t index = 0; size_t d = metadata.depth; for( size_t level = 0; level < metadata.mipLevels; ++level ) { for( size_t slice = 0; slice < d; ++slice, ++index ) { if ( index >= nimages ) return E_FAIL; const Image& src = srcImages[ index ]; if ( src.format != metadata.format ) { result.Release(); return E_FAIL; } #ifdef _M_X64 if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) ) return E_FAIL; #endif const Image& dst = dest[ index ]; assert( dst.format == format ); if ( src.width != dst.width || src.height != dst.height ) { result.Release(); return E_FAIL; } if ( usewic ) { hr = _ConvertUsingWIC( src, pfGUID, targetGUID, filter, threshold, dst ); } else { hr = _Convert( src, filter, dst, threshold, slice ); } if ( FAILED(hr) ) { result.Release(); return hr; } } if ( d > 1 ) d >>= 1; } } break; default: return E_FAIL; } return S_OK; } }; // namespace