//------------------------------------------------------------------------------------- // BC.h // // Block-compression (BC) functionality // // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. // // http://go.microsoft.com/fwlink/?LinkId=248926 //------------------------------------------------------------------------------------- #pragma once #include #include #include namespace DirectX { //------------------------------------------------------------------------------------- // Macros //------------------------------------------------------------------------------------- // Because these are used in SAL annotations, they need to remain macros rather than const values #define NUM_PIXELS_PER_BLOCK 16 //------------------------------------------------------------------------------------- // Constants //------------------------------------------------------------------------------------- enum BC_FLAGS : uint32_t { BC_FLAGS_NONE = 0x0, BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3 BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3 BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting BC_FLAGS_USE_3SUBSETS = 0x80000, // By default, BC7 skips mode 0 & 2; this flag adds those modes back BC_FLAGS_FORCE_BC7_MODE6 = 0x100000, // BC7 should only use mode 6; skip other modes }; //------------------------------------------------------------------------------------- // Structures //------------------------------------------------------------------------------------- class LDRColorA; class HDRColorA { public: float r, g, b, a; public: HDRColorA() = default; HDRColorA(float _r, float _g, float _b, float _a) noexcept : r(_r), g(_g), b(_b), a(_a) {} HDRColorA(const HDRColorA& c) noexcept : r(c.r), g(c.g), b(c.b), a(c.a) {} // binary operators HDRColorA operator + (const HDRColorA& c) const noexcept { return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a); } HDRColorA operator - (const HDRColorA& c) const noexcept { return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a); } HDRColorA operator * (float f) const noexcept { return HDRColorA(r * f, g * f, b * f, a * f); } HDRColorA operator / (float f) const noexcept { float fInv = 1.0f / f; return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv); } float operator * (const HDRColorA& c) const noexcept { return r * c.r + g * c.g + b * c.b + a * c.a; } // assignment operators HDRColorA& operator += (const HDRColorA& c) noexcept { r += c.r; g += c.g; b += c.b; a += c.a; return *this; } HDRColorA& operator -= (const HDRColorA& c) noexcept { r -= c.r; g -= c.g; b -= c.b; a -= c.a; return *this; } HDRColorA& operator *= (float f) noexcept { r *= f; g *= f; b *= f; a *= f; return *this; } HDRColorA& operator /= (float f) noexcept { float fInv = 1.0f / f; r *= fInv; g *= fInv; b *= fInv; a *= fInv; return *this; } HDRColorA& Clamp(_In_ float fMin, _In_ float fMax) noexcept { r = std::min(fMax, std::max(fMin, r)); g = std::min(fMax, std::max(fMin, g)); b = std::min(fMax, std::max(fMin, b)); a = std::min(fMax, std::max(fMin, a)); return *this; } HDRColorA(const LDRColorA& c) noexcept; HDRColorA& operator = (const LDRColorA& c) noexcept; LDRColorA ToLDRColorA() const noexcept; }; inline HDRColorA* HDRColorALerp(_Out_ HDRColorA *pOut, _In_ const HDRColorA *pC1, _In_ const HDRColorA *pC2, _In_ float s) noexcept { pOut->r = pC1->r + s * (pC2->r - pC1->r); pOut->g = pC1->g + s * (pC2->g - pC1->g); pOut->b = pC1->b + s * (pC2->b - pC1->b); pOut->a = pC1->a + s * (pC2->a - pC1->a); return pOut; } #pragma pack(push,1) // BC1/DXT1 compression (4 bits per texel) struct D3DX_BC1 { uint16_t rgb[2]; // 565 colors uint32_t bitmap; // 2bpp rgb bitmap }; // BC2/DXT2/3 compression (8 bits per texel) struct D3DX_BC2 { uint32_t bitmap[2]; // 4bpp alpha bitmap D3DX_BC1 bc1; // BC1 rgb data }; // BC3/DXT4/5 compression (8 bits per texel) struct D3DX_BC3 { uint8_t alpha[2]; // alpha values uint8_t bitmap[6]; // 3bpp alpha bitmap D3DX_BC1 bc1; // BC1 rgb data }; #pragma pack(pop) //------------------------------------------------------------------------------------- // Templates //------------------------------------------------------------------------------------- #pragma warning(push) #pragma warning(disable : 4127) template void OptimizeAlpha(float *pX, float *pY, const float *pPoints, uint32_t cSteps) noexcept { static const float pC6[] = { 5.0f / 5.0f, 4.0f / 5.0f, 3.0f / 5.0f, 2.0f / 5.0f, 1.0f / 5.0f, 0.0f / 5.0f }; static const float pD6[] = { 0.0f / 5.0f, 1.0f / 5.0f, 2.0f / 5.0f, 3.0f / 5.0f, 4.0f / 5.0f, 5.0f / 5.0f }; static const float pC8[] = { 7.0f / 7.0f, 6.0f / 7.0f, 5.0f / 7.0f, 4.0f / 7.0f, 3.0f / 7.0f, 2.0f / 7.0f, 1.0f / 7.0f, 0.0f / 7.0f }; static const float pD8[] = { 0.0f / 7.0f, 1.0f / 7.0f, 2.0f / 7.0f, 3.0f / 7.0f, 4.0f / 7.0f, 5.0f / 7.0f, 6.0f / 7.0f, 7.0f / 7.0f }; const float *pC = (6 == cSteps) ? pC6 : pC8; const float *pD = (6 == cSteps) ? pD6 : pD8; const float MAX_VALUE = 1.0f; const float MIN_VALUE = (bRange) ? -1.0f : 0.0f; // Find Min and Max points, as starting point float fX = MAX_VALUE; float fY = MIN_VALUE; if (8 == cSteps) { for (size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++) { if (pPoints[iPoint] < fX) fX = pPoints[iPoint]; if (pPoints[iPoint] > fY) fY = pPoints[iPoint]; } } else { for (size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++) { if (pPoints[iPoint] < fX && pPoints[iPoint] > MIN_VALUE) fX = pPoints[iPoint]; if (pPoints[iPoint] > fY && pPoints[iPoint] < MAX_VALUE) fY = pPoints[iPoint]; } if (fX == fY) { fY = MAX_VALUE; } } // Use Newton's Method to find local minima of sum-of-squares error. auto fSteps = static_cast(cSteps - 1); for (size_t iIteration = 0; iIteration < 8; iIteration++) { if ((fY - fX) < (1.0f / 256.0f)) break; float fScale = fSteps / (fY - fX); // Calculate new steps float pSteps[8]; for (size_t iStep = 0; iStep < cSteps; iStep++) pSteps[iStep] = pC[iStep] * fX + pD[iStep] * fY; if (6 == cSteps) { pSteps[6] = MIN_VALUE; pSteps[7] = MAX_VALUE; } // Evaluate function, and derivatives float dX = 0.0f; float dY = 0.0f; float d2X = 0.0f; float d2Y = 0.0f; for (size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++) { float fDot = (pPoints[iPoint] - fX) * fScale; uint32_t iStep; if (fDot <= 0.0f) { // D3DX10 / D3DX11 didn't take into account the proper minimum value for the bRange (BC4S/BC5S) case iStep = ((6 == cSteps) && (pPoints[iPoint] <= (fX + MIN_VALUE) * 0.5f)) ? 6u : 0u; } else if (fDot >= fSteps) { iStep = ((6 == cSteps) && (pPoints[iPoint] >= (fY + MAX_VALUE) * 0.5f)) ? 7u : (cSteps - 1); } else { iStep = uint32_t(fDot + 0.5f); } if (iStep < cSteps) { // D3DX had this computation backwards (pPoints[iPoint] - pSteps[iStep]) // this fix improves RMS of the alpha component float fDiff = pSteps[iStep] - pPoints[iPoint]; dX += pC[iStep] * fDiff; d2X += pC[iStep] * pC[iStep]; dY += pD[iStep] * fDiff; d2Y += pD[iStep] * pD[iStep]; } } // Move endpoints if (d2X > 0.0f) fX -= dX / d2X; if (d2Y > 0.0f) fY -= dY / d2Y; if (fX > fY) { float f = fX; fX = fY; fY = f; } if ((dX * dX < (1.0f / 64.0f)) && (dY * dY < (1.0f / 64.0f))) break; } *pX = (fX < MIN_VALUE) ? MIN_VALUE : (fX > MAX_VALUE) ? MAX_VALUE : fX; *pY = (fY < MIN_VALUE) ? MIN_VALUE : (fY > MAX_VALUE) ? MAX_VALUE : fY; } #pragma warning(pop) //------------------------------------------------------------------------------------- // Functions //------------------------------------------------------------------------------------- typedef void (*BC_DECODE)(XMVECTOR *pColor, const uint8_t *pBC); typedef void (*BC_ENCODE)(uint8_t *pDXT, const XMVECTOR *pColor, uint32_t flags); void D3DXDecodeBC1(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC) noexcept; void D3DXDecodeBC2(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept; void D3DXDecodeBC3(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept; void D3DXDecodeBC4U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC) noexcept; void D3DXDecodeBC4S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC) noexcept; void D3DXDecodeBC5U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept; void D3DXDecodeBC5S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept; void D3DXDecodeBC6HU(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept; void D3DXDecodeBC6HS(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept; void D3DXDecodeBC7(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept; void D3DXEncodeBC1(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ float threshold, _In_ uint32_t flags) noexcept; // BC1 requires one additional parameter, so it doesn't match signature of BC_ENCODE above void D3DXEncodeBC2(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept; void D3DXEncodeBC3(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept; void D3DXEncodeBC4U(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept; void D3DXEncodeBC4S(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept; void D3DXEncodeBC5U(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept; void D3DXEncodeBC5S(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept; void D3DXEncodeBC6HU(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept; void D3DXEncodeBC6HS(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept; void D3DXEncodeBC7(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept; } // namespace