2020-09-25 13:10:09 +01:00

332 lines
12 KiB

// BC.h
// Block-compression (BC) functionality
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <assert.h>
#include <DirectXMath.h>
#include <DirectXPackedVector.h>
namespace DirectX
// Macros
// Because these are used in SAL annotations, they need to remain macros rather than const values
// Constants
enum BC_FLAGS : uint32_t
BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3
BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3
BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting
BC_FLAGS_USE_3SUBSETS = 0x80000, // By default, BC7 skips mode 0 & 2; this flag adds those modes back
BC_FLAGS_FORCE_BC7_MODE6 = 0x100000, // BC7 should only use mode 6; skip other modes
// Structures
class LDRColorA;
class HDRColorA
float r, g, b, a;
HDRColorA() = default;
HDRColorA(float _r, float _g, float _b, float _a) noexcept : r(_r), g(_g), b(_b), a(_a) {}
HDRColorA(const HDRColorA& c) noexcept : r(c.r), g(c.g), b(c.b), a(c.a) {}
// binary operators
HDRColorA operator + (const HDRColorA& c) const noexcept
return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a);
HDRColorA operator - (const HDRColorA& c) const noexcept
return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a);
HDRColorA operator * (float f) const noexcept
return HDRColorA(r * f, g * f, b * f, a * f);
HDRColorA operator / (float f) const noexcept
float fInv = 1.0f / f;
return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv);
float operator * (const HDRColorA& c) const noexcept
return r * c.r + g * c.g + b * c.b + a * c.a;
// assignment operators
HDRColorA& operator += (const HDRColorA& c) noexcept
r += c.r;
g += c.g;
b += c.b;
a += c.a;
return *this;
HDRColorA& operator -= (const HDRColorA& c) noexcept
r -= c.r;
g -= c.g;
b -= c.b;
a -= c.a;
return *this;
HDRColorA& operator *= (float f) noexcept
r *= f;
g *= f;
b *= f;
a *= f;
return *this;
HDRColorA& operator /= (float f) noexcept
float fInv = 1.0f / f;
r *= fInv;
g *= fInv;
b *= fInv;
a *= fInv;
return *this;
HDRColorA& Clamp(_In_ float fMin, _In_ float fMax) noexcept
r = std::min<float>(fMax, std::max<float>(fMin, r));
g = std::min<float>(fMax, std::max<float>(fMin, g));
b = std::min<float>(fMax, std::max<float>(fMin, b));
a = std::min<float>(fMax, std::max<float>(fMin, a));
return *this;
HDRColorA(const LDRColorA& c) noexcept;
HDRColorA& operator = (const LDRColorA& c) noexcept;
LDRColorA ToLDRColorA() const noexcept;
inline HDRColorA* HDRColorALerp(_Out_ HDRColorA *pOut, _In_ const HDRColorA *pC1, _In_ const HDRColorA *pC2, _In_ float s) noexcept
pOut->r = pC1->r + s * (pC2->r - pC1->r);
pOut->g = pC1->g + s * (pC2->g - pC1->g);
pOut->b = pC1->b + s * (pC2->b - pC1->b);
pOut->a = pC1->a + s * (pC2->a - pC1->a);
return pOut;
#pragma pack(push,1)
// BC1/DXT1 compression (4 bits per texel)
struct D3DX_BC1
uint16_t rgb[2]; // 565 colors
uint32_t bitmap; // 2bpp rgb bitmap
// BC2/DXT2/3 compression (8 bits per texel)
struct D3DX_BC2
uint32_t bitmap[2]; // 4bpp alpha bitmap
D3DX_BC1 bc1; // BC1 rgb data
// BC3/DXT4/5 compression (8 bits per texel)
struct D3DX_BC3
uint8_t alpha[2]; // alpha values
uint8_t bitmap[6]; // 3bpp alpha bitmap
D3DX_BC1 bc1; // BC1 rgb data
#pragma pack(pop)
// Templates
#pragma warning(push)
#pragma warning(disable : 4127)
template <bool bRange> void OptimizeAlpha(float *pX, float *pY, const float *pPoints, uint32_t cSteps) noexcept
static const float pC6[] = { 5.0f / 5.0f, 4.0f / 5.0f, 3.0f / 5.0f, 2.0f / 5.0f, 1.0f / 5.0f, 0.0f / 5.0f };
static const float pD6[] = { 0.0f / 5.0f, 1.0f / 5.0f, 2.0f / 5.0f, 3.0f / 5.0f, 4.0f / 5.0f, 5.0f / 5.0f };
static const float pC8[] = { 7.0f / 7.0f, 6.0f / 7.0f, 5.0f / 7.0f, 4.0f / 7.0f, 3.0f / 7.0f, 2.0f / 7.0f, 1.0f / 7.0f, 0.0f / 7.0f };
static const float pD8[] = { 0.0f / 7.0f, 1.0f / 7.0f, 2.0f / 7.0f, 3.0f / 7.0f, 4.0f / 7.0f, 5.0f / 7.0f, 6.0f / 7.0f, 7.0f / 7.0f };
const float *pC = (6 == cSteps) ? pC6 : pC8;
const float *pD = (6 == cSteps) ? pD6 : pD8;
const float MAX_VALUE = 1.0f;
const float MIN_VALUE = (bRange) ? -1.0f : 0.0f;
// Find Min and Max points, as starting point
float fX = MAX_VALUE;
float fY = MIN_VALUE;
if (8 == cSteps)
for (size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
if (pPoints[iPoint] < fX)
fX = pPoints[iPoint];
if (pPoints[iPoint] > fY)
fY = pPoints[iPoint];
for (size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
if (pPoints[iPoint] < fX && pPoints[iPoint] > MIN_VALUE)
fX = pPoints[iPoint];
if (pPoints[iPoint] > fY && pPoints[iPoint] < MAX_VALUE)
fY = pPoints[iPoint];
if (fX == fY)
// Use Newton's Method to find local minima of sum-of-squares error.
auto fSteps = static_cast<float>(cSteps - 1);
for (size_t iIteration = 0; iIteration < 8; iIteration++)
if ((fY - fX) < (1.0f / 256.0f))
float fScale = fSteps / (fY - fX);
// Calculate new steps
float pSteps[8];
for (size_t iStep = 0; iStep < cSteps; iStep++)
pSteps[iStep] = pC[iStep] * fX + pD[iStep] * fY;
if (6 == cSteps)
pSteps[6] = MIN_VALUE;
pSteps[7] = MAX_VALUE;
// Evaluate function, and derivatives
float dX = 0.0f;
float dY = 0.0f;
float d2X = 0.0f;
float d2Y = 0.0f;
for (size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
float fDot = (pPoints[iPoint] - fX) * fScale;
uint32_t iStep;
if (fDot <= 0.0f)
// D3DX10 / D3DX11 didn't take into account the proper minimum value for the bRange (BC4S/BC5S) case
iStep = ((6 == cSteps) && (pPoints[iPoint] <= (fX + MIN_VALUE) * 0.5f)) ? 6u : 0u;
else if (fDot >= fSteps)
iStep = ((6 == cSteps) && (pPoints[iPoint] >= (fY + MAX_VALUE) * 0.5f)) ? 7u : (cSteps - 1);
iStep = uint32_t(fDot + 0.5f);
if (iStep < cSteps)
// D3DX had this computation backwards (pPoints[iPoint] - pSteps[iStep])
// this fix improves RMS of the alpha component
float fDiff = pSteps[iStep] - pPoints[iPoint];
dX += pC[iStep] * fDiff;
d2X += pC[iStep] * pC[iStep];
dY += pD[iStep] * fDiff;
d2Y += pD[iStep] * pD[iStep];
// Move endpoints
if (d2X > 0.0f)
fX -= dX / d2X;
if (d2Y > 0.0f)
fY -= dY / d2Y;
if (fX > fY)
float f = fX; fX = fY; fY = f;
if ((dX * dX < (1.0f / 64.0f)) && (dY * dY < (1.0f / 64.0f)))
#pragma warning(pop)
// Functions
typedef void (*BC_DECODE)(XMVECTOR *pColor, const uint8_t *pBC);
typedef void (*BC_ENCODE)(uint8_t *pDXT, const XMVECTOR *pColor, uint32_t flags);
void D3DXDecodeBC1(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC) noexcept;
void D3DXDecodeBC2(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept;
void D3DXDecodeBC3(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept;
void D3DXDecodeBC4U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC) noexcept;
void D3DXDecodeBC4S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC) noexcept;
void D3DXDecodeBC5U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept;
void D3DXDecodeBC5S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept;
void D3DXDecodeBC6HU(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept;
void D3DXDecodeBC6HS(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept;
void D3DXDecodeBC7(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC) noexcept;
void D3DXEncodeBC1(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ float threshold, _In_ uint32_t flags) noexcept;
// BC1 requires one additional parameter, so it doesn't match signature of BC_ENCODE above
void D3DXEncodeBC2(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept;
void D3DXEncodeBC3(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept;
void D3DXEncodeBC4U(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept;
void D3DXEncodeBC4S(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept;
void D3DXEncodeBC5U(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept;
void D3DXEncodeBC5S(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept;
void D3DXEncodeBC6HU(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept;
void D3DXEncodeBC6HS(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept;
void D3DXEncodeBC7(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ uint32_t flags) noexcept;
} // namespace