diff --git a/src/utils/SkTextureCompressor_ASTC.cpp b/src/utils/SkTextureCompressor_ASTC.cpp index 8efffdfc9e..fbae8504e5 100644 --- a/src/utils/SkTextureCompressor_ASTC.cpp +++ b/src/utils/SkTextureCompressor_ASTC.cpp @@ -10,6 +10,7 @@ #include "SkBlitter.h" #include "SkEndian.h" +#include "SkMath.h" // This table contains the weight values for each texel. This is used in determining // how to convert a 12x12 grid of alpha values into a 6x5 grid of index values. Since @@ -260,11 +261,1742 @@ inline void CompressA8ASTCBlockVertical(uint8_t* dst, const uint8_t* src) { compress_a8_astc_block(&dst, src, 12); } +//////////////////////////////////////////////////////////////////////////////// +// +// ASTC Decoder +// +// Full details available in the spec: +// http://www.khronos.org/registry/gles/extensions/OES/OES_texture_compression_astc.txt +// +//////////////////////////////////////////////////////////////////////////////// + +// Enable this to assert whenever a decoded block has invalid ASTC values. Otherwise, +// each invalid block will result in a disgusting magenta color. +#define ASSERT_ASTC_DECODE_ERROR 0 + +// Reverse 64-bit integer taken from TAOCP 4a, although it's better +// documented at this site: +// http://matthewarcus.wordpress.com/2012/11/18/reversing-a-64-bit-word/ + +template +static inline T swap_bits(T p) { + T q = ((p>>k)^p) & m; + return p^q^(q<>1)&m0) | (n&m0)<<1; + n = swap_bits(n); + n = swap_bits(n); + n = swap_bits(n); + n = (n >> 34) | (n << 30); + return n; +} + +// An ASTC block is 128 bits. We represent it as two 64-bit integers in order +// to efficiently operate on the block using bitwise operations. +struct ASTCBlock { + uint64_t fLow; + uint64_t fHigh; + + // Reverses the bits of an ASTC block, making the LSB of the + // 128 bit block the MSB. + inline void reverse() { + const uint64_t newLow = reverse64(this->fHigh); + this->fHigh = reverse64(this->fLow); + this->fLow = newLow; + } +}; + +// Writes the given color to every pixel in the block. This is used by void-extent +// blocks (a special constant-color encoding of a block) and by the error function. +static inline void write_constant_color(uint8_t* dst, int blockDimX, int blockDimY, + int dstRowBytes, SkColor color) { + for (int y = 0; y < blockDimY; ++y) { + SkColor *dstColors = reinterpret_cast(dst); + for (int x = 0; x < blockDimX; ++x) { + dstColors[x] = color; + } + dst += dstRowBytes; + } +} + +// Sets the entire block to the ASTC "error" color, a disgusting magenta +// that's not supposed to appear in natural images. +static inline void write_error_color(uint8_t* dst, int blockDimX, int blockDimY, + int dstRowBytes) { + static const SkColor kASTCErrorColor = SkColorSetRGB(0xFF, 0, 0xFF); + +#if ASSERT_ASTC_DECODE_ERROR + SkDEBUGFAIL("ASTC decoding error!\n"); +#endif + + write_constant_color(dst, blockDimX, blockDimY, dstRowBytes, kASTCErrorColor); +} + +// Reads up to 64 bits of the ASTC block starting from bit +// 'from' and going up to but not including bit 'to'. 'from' starts +// counting from the LSB, counting up to the MSB. Returns -1 on +// error. +static uint64_t read_astc_bits(const ASTCBlock &block, int from, int to) { + SkASSERT(0 <= from && from <= 128); + SkASSERT(0 <= to && to <= 128); + + const int nBits = to - from; + if (0 == nBits) { + return 0; + } + + if (nBits < 0 || 64 <= nBits) { + SkDEBUGFAIL("ASTC -- shouldn't read more than 64 bits"); + return -1; + } + + // Remember, the 'to' bit isn't read. + uint64_t result = 0; + if (to <= 64) { + // All desired bits are in the low 64-bits. + result = (block.fLow >> from) & ((1ULL << nBits) - 1); + } else if (from >= 64) { + // All desired bits are in the high 64-bits. + result = (block.fHigh >> (from - 64)) & ((1ULL << nBits) - 1); + } else { + // from < 64 && to > 64 + SkASSERT(nBits > (64 - from)); + const int nLow = 64 - from; + const int nHigh = nBits - nLow; + result = + ((block.fLow >> from) & ((1ULL << nLow) - 1)) | + ((block.fHigh & ((1ULL << nHigh) - 1)) << nLow); + } + + return result; +} + +// Returns the number of bits needed to represent a number +// in the given power-of-two range (excluding the power of two itself). +static inline int bits_for_range(int x) { + SkASSERT(SkIsPow2(x)); + SkASSERT(0 != x); + // Since we know it's a power of two, there should only be one bit set, + // meaning the number of trailing zeros is 31 minus the number of leading + // zeros. + return 31 - SkCLZ(x); +} + +// Clamps an integer to the range [0, 255] +static inline int clamp_byte(int x) { + return SkClampMax(x, 255); +} + +// Helper function defined in the ASTC spec, section C.2.14 +// It transfers a few bits of precision from one value to another. +static inline void bit_transfer_signed(int *a, int *b) { + *b >>= 1; + *b |= *a & 0x80; + *a >>= 1; + *a &= 0x3F; + if ( (*a & 0x20) != 0 ) { + *a -= 0x40; + } +} + +// Helper function defined in the ASTC spec, section C.2.14 +// It uses the value in the blue channel to tint the red and green +static inline SkColor blue_contract(int a, int r, int g, int b) { + return SkColorSetARGB(a, (r + b) >> 1, (g + b) >> 1, b); +} + +// Helper function that decodes two colors from eight values. If isRGB is true, +// then the pointer 'v' contains six values and the last two are considered to be +// 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This +// corresponds to the decode procedure for the following endpoint modes: +// kLDR_RGB_Direct_ColorEndpointMode +// kLDR_RGBA_Direct_ColorEndpointMode +static inline void decode_rgba_direct(const int *v, SkColor *endpoints, bool isRGB) { + + int v6 = 0xFF; + int v7 = 0xFF; + if (!isRGB) { + v6 = v[6]; + v7 = v[7]; + } + + const int s0 = v[0] + v[2] + v[4]; + const int s1 = v[1] + v[3] + v[5]; + + if (s1 >= s0) { + endpoints[0] = SkColorSetARGB(v6, v[0], v[2], v[4]); + endpoints[1] = SkColorSetARGB(v7, v[1], v[3], v[5]); + } else { + endpoints[0] = blue_contract(v7, v[1], v[3], v[5]); + endpoints[1] = blue_contract(v6, v[0], v[2], v[4]); + } +} + +// Helper function that decodes two colors from six values. If isRGB is true, +// then the pointer 'v' contains four values and the last two are considered to be +// 0xFF. If isRGB is false, then all six values come from the pointer 'v'. This +// corresponds to the decode procedure for the following endpoint modes: +// kLDR_RGB_BaseScale_ColorEndpointMode +// kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode +static inline void decode_rgba_basescale(const int *v, SkColor *endpoints, bool isRGB) { + + int v4 = 0xFF; + int v5 = 0xFF; + if (!isRGB) { + v4 = v[4]; + v5 = v[5]; + } + + endpoints[0] = SkColorSetARGB(v4, + (v[0]*v[3]) >> 8, + (v[1]*v[3]) >> 8, + (v[2]*v[3]) >> 8); + endpoints[1] = SkColorSetARGB(v5, v[0], v[1], v[2]); +} + +// Helper function that decodes two colors from eight values. If isRGB is true, +// then the pointer 'v' contains six values and the last two are considered to be +// 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This +// corresponds to the decode procedure for the following endpoint modes: +// kLDR_RGB_BaseOffset_ColorEndpointMode +// kLDR_RGBA_BaseOffset_ColorEndpointMode +// +// If isRGB is true, then treat this as if v6 and v7 are meant to encode full alpha values. +static inline void decode_rgba_baseoffset(const int *v, SkColor *endpoints, bool isRGB) { + int v0 = v[0]; + int v1 = v[1]; + int v2 = v[2]; + int v3 = v[3]; + int v4 = v[4]; + int v5 = v[5]; + int v6 = isRGB ? 0xFF : v[6]; + // The 0 is here because this is an offset, not a direct value + int v7 = isRGB ? 0 : v[7]; + + bit_transfer_signed(&v1, &v0); + bit_transfer_signed(&v3, &v2); + bit_transfer_signed(&v5, &v4); + if (!isRGB) { + bit_transfer_signed(&v7, &v6); + } + + int c[2][4]; + if ((v1 + v3 + v5) >= 0) { + c[0][0] = v6; + c[0][1] = v0; + c[0][2] = v2; + c[0][3] = v4; + + c[1][0] = v6 + v7; + c[1][1] = v0 + v1; + c[1][2] = v2 + v3; + c[1][3] = v4 + v5; + } else { + c[0][0] = v6 + v7; + c[0][1] = (v0 + v1 + v4 + v5) >> 1; + c[0][2] = (v2 + v3 + v4 + v5) >> 1; + c[0][3] = v4 + v5; + + c[1][0] = v6; + c[1][1] = (v0 + v4) >> 1; + c[1][2] = (v2 + v4) >> 1; + c[1][3] = v4; + } + + endpoints[0] = SkColorSetARGB(clamp_byte(c[0][0]), + clamp_byte(c[0][1]), + clamp_byte(c[0][2]), + clamp_byte(c[0][3])); + + endpoints[1] = SkColorSetARGB(clamp_byte(c[1][0]), + clamp_byte(c[1][1]), + clamp_byte(c[1][2]), + clamp_byte(c[1][3])); +} + + +// A helper class used to decode bit values from standard integer values. +// We can't use this class with ASTCBlock because then it would need to +// handle multi-value ranges, and it's non-trivial to lookup a range of bits +// that splits across two different ints. +template +class SkTBits { +public: + SkTBits(const T val) : fVal(val) { } + + // Returns the bit at the given position + T operator [](const int idx) const { + return (fVal >> idx) & 1; + } + + // Returns the bits in the given range, inclusive + T operator ()(const int end, const int start) const { + SkASSERT(end >= start); + return (fVal >> start) & ((1ULL << ((end - start) + 1)) - 1); + } + +private: + const T fVal; +}; + +// This algorithm matches the trit block decoding in the spec (Table C.2.14) +static void decode_trit_block(int* dst, int nBits, const uint64_t &block) { + + SkTBits blockBits(block); + + // According to the spec, a trit block, which contains five values, + // has the following layout: + // + // 27 26 25 24 23 22 21 20 19 18 17 16 + // ----------------------------------------------- + // |T7 | m4 |T6 T5 | m3 |T4 | + // ----------------------------------------------- + // + // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 + // -------------------------------------------------------------- + // | m2 |T3 T2 | m1 |T1 T0 | m0 | + // -------------------------------------------------------------- + // + // Where the m's are variable width depending on the number of bits used + // to encode the values (anywhere from 0 to 6). Since 3^5 = 243, the extra + // byte labeled T (whose bits are interleaved where 0 is the LSB and 7 is + // the MSB), contains five trit values. To decode the trit values, the spec + // says that we need to follow the following algorithm: + // + // if T[4:2] = 111 + // C = { T[7:5], T[1:0] }; t4 = t3 = 2 + // else + // C = T[4:0] + // + // if T[6:5] = 11 + // t4 = 2; t3 = T[7] + // else + // t4 = T[7]; t3 = T[6:5] + // + // if C[1:0] = 11 + // t2 = 2; t1 = C[4]; t0 = { C[3], C[2]&~C[3] } + // else if C[3:2] = 11 + // t2 = 2; t1 = 2; t0 = C[1:0] + // else + // t2 = C[4]; t1 = C[3:2]; t0 = { C[1], C[0]&~C[1] } + // + // The following C++ code is meant to mirror this layout and algorithm as + // closely as possible. + + int m[5]; + if (0 == nBits) { + memset(m, 0, sizeof(m)); + } else { + SkASSERT(nBits < 8); + m[0] = static_cast(blockBits(nBits - 1, 0)); + m[1] = static_cast(blockBits(2*nBits - 1 + 2, nBits + 2)); + m[2] = static_cast(blockBits(3*nBits - 1 + 4, 2*nBits + 4)); + m[3] = static_cast(blockBits(4*nBits - 1 + 5, 3*nBits + 5)); + m[4] = static_cast(blockBits(5*nBits - 1 + 7, 4*nBits + 7)); + } + + int T = + static_cast(blockBits(nBits + 1, nBits)) | + (static_cast(blockBits(2*nBits + 2 + 1, 2*nBits + 2)) << 2) | + (static_cast(blockBits[3*nBits + 4] << 4)) | + (static_cast(blockBits(4*nBits + 5 + 1, 4*nBits + 5)) << 5) | + (static_cast(blockBits[5*nBits + 7] << 7)); + + int t[5]; + + int C; + SkTBits Tbits(T); + if (0x7 == Tbits(4, 2)) { + C = (Tbits(7, 5) << 2) | Tbits(1, 0); + t[3] = t[4] = 2; + } else { + C = Tbits(4, 0); + if (Tbits(6, 5) == 0x3) { + t[4] = 2; t[3] = Tbits[7]; + } else { + t[4] = Tbits[7]; t[3] = Tbits(6, 5); + } + } + + SkTBits Cbits(C); + if (Cbits(1, 0) == 0x3) { + t[2] = 2; + t[1] = Cbits[4]; + t[0] = (Cbits[3] << 1) | (Cbits[2] & (0x1 & ~(Cbits[3]))); + } else if (Cbits(3, 2) == 0x3) { + t[2] = 2; + t[1] = 2; + t[0] = Cbits(1, 0); + } else { + t[2] = Cbits[4]; + t[1] = Cbits(3, 2); + t[0] = (Cbits[1] << 1) | (Cbits[0] & (0x1 & ~(Cbits[1]))); + } + +#ifdef SK_DEBUG + // Make sure all of the decoded values have a trit less than three + // and a bit value within the range of the allocated bits. + for (int i = 0; i < 5; ++i) { + SkASSERT(t[i] < 3); + SkASSERT(m[i] < (1 << nBits)); + } +#endif + + for (int i = 0; i < 5; ++i) { + *dst = (t[i] << nBits) + m[i]; + ++dst; + } +} + +// This algorithm matches the quint block decoding in the spec (Table C.2.15) +static void decode_quint_block(int* dst, int nBits, const uint64_t &block) { + SkTBits blockBits(block); + + // According to the spec, a quint block, which contains three values, + // has the following layout: + // + // + // 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 + // -------------------------------------------------------------------------- + // |Q6 Q5 | m2 |Q4 Q3 | m1 |Q2 Q1 Q0 | m0 | + // -------------------------------------------------------------------------- + // + // Where the m's are variable width depending on the number of bits used + // to encode the values (anywhere from 0 to 4). Since 5^3 = 125, the extra + // 7-bit value labeled Q (whose bits are interleaved where 0 is the LSB and 6 is + // the MSB), contains three quint values. To decode the quint values, the spec + // says that we need to follow the following algorithm: + // + // if Q[2:1] = 11 and Q[6:5] = 00 + // q2 = { Q[0], Q[4]&~Q[0], Q[3]&~Q[0] }; q1 = q0 = 4 + // else + // if Q[2:1] = 11 + // q2 = 4; C = { Q[4:3], ~Q[6:5], Q[0] } + // else + // q2 = T[6:5]; C = Q[4:0] + // + // if C[2:0] = 101 + // q1 = 4; q0 = C[4:3] + // else + // q1 = C[4:3]; q0 = C[2:0] + // + // The following C++ code is meant to mirror this layout and algorithm as + // closely as possible. + + int m[3]; + if (0 == nBits) { + memset(m, 0, sizeof(m)); + } else { + SkASSERT(nBits < 8); + m[0] = static_cast(blockBits(nBits - 1, 0)); + m[1] = static_cast(blockBits(2*nBits - 1 + 3, nBits + 3)); + m[2] = static_cast(blockBits(3*nBits - 1 + 5, 2*nBits + 5)); + } + + int Q = + static_cast(blockBits(nBits + 2, nBits)) | + (static_cast(blockBits(2*nBits + 3 + 1, 2*nBits + 3)) << 3) | + (static_cast(blockBits(3*nBits + 5 + 1, 3*nBits + 5)) << 5); + + int q[3]; + SkTBits Qbits(Q); // quantum? + + if (Qbits(2, 1) == 0x3 && Qbits(6, 5) == 0) { + const int notBitZero = (0x1 & ~(Qbits[0])); + q[2] = (Qbits[0] << 2) | ((Qbits[4] & notBitZero) << 1) | (Qbits[3] & notBitZero); + q[1] = 4; + q[0] = 4; + } else { + int C; + if (Qbits(2, 1) == 0x3) { + q[2] = 4; + C = (Qbits(4, 3) << 3) | ((0x3 & ~(Qbits(6, 5))) << 1) | Qbits[0]; + } else { + q[2] = Qbits(6, 5); + C = Qbits(4, 0); + } + + SkTBits Cbits(C); + if (Cbits(2, 0) == 0x5) { + q[1] = 4; + q[0] = Cbits(4, 3); + } else { + q[1] = Cbits(4, 3); + q[0] = Cbits(2, 0); + } + } + +#ifdef SK_DEBUG + for (int i = 0; i < 3; ++i) { + SkASSERT(q[i] < 5); + SkASSERT(m[i] < (1 << nBits)); + } +#endif + + for (int i = 0; i < 3; ++i) { + *dst = (q[i] << nBits) + m[i]; + ++dst; + } +} + +// Function that decodes a sequence of integers stored as an ISE (Integer +// Sequence Encoding) bit stream. The full details of this function are outlined +// in section C.2.12 of the ASTC spec. A brief overview is as follows: +// +// - Each integer in the sequence is bounded by a specific range r. +// - The range of each value determines the way the bit stream is interpreted, +// - If the range is a power of two, then the sequence is a sequence of bits +// - If the range is of the form 3*2^n, then the sequence is stored as a +// sequence of blocks, each block contains 5 trits and 5 bit sequences, which +// decodes into 5 values. +// - Similarly, if the range is of the form 5*2^n, then the sequence is stored as a +// sequence of blocks, each block contains 3 quints and 3 bit sequences, which +// decodes into 3 values. +static bool decode_integer_sequence( + int* dst, // The array holding the destination bits + int dstSize, // The maximum size of the array + int nVals, // The number of values that we'd like to decode + const ASTCBlock &block, // The block that we're decoding from + int startBit, // The bit from which we're going to do the reading + int endBit, // The bit at which we stop reading (not inclusive) + bool bReadForward, // If true, then read LSB -> MSB, else read MSB -> LSB + int nBits, // The number of bits representing this encoding + int nTrits, // The number of trits representing this encoding + int nQuints // The number of quints representing this encoding +) { + // If we want more values than we have, then fail. + if (nVals > dstSize) { + return false; + } + + ASTCBlock src = block; + + if (!bReadForward) { + src.reverse(); + startBit = 128 - startBit; + endBit = 128 - endBit; + } + + while (nVals > 0) { + + if (nTrits > 0) { + SkASSERT(0 == nQuints); + + int endBlockBit = startBit + 8 + 5*nBits; + if (endBlockBit > endBit) { + endBlockBit = endBit; + } + + decode_trit_block(dst, nBits, read_astc_bits(src, startBit, endBlockBit)); + dst += 5; + nVals -= 5; + startBit = endBlockBit; + + } else if (nQuints > 0) { + SkASSERT(0 == nTrits); + + int endBlockBit = startBit + 7 + 3*nBits; + if (endBlockBit > endBit) { + endBlockBit = endBit; + } + + decode_quint_block(dst, nBits, read_astc_bits(src, startBit, endBlockBit)); + dst += 3; + nVals -= 3; + startBit = endBlockBit; + + } else { + // Just read the bits, but don't read more than we have... + int endValBit = startBit + nBits; + if (endValBit > endBit) { + endValBit = endBit; + } + + SkASSERT(endValBit - startBit < 31); + *dst = static_cast(read_astc_bits(src, startBit, endValBit)); + ++dst; + --nVals; + startBit = endValBit; + } + } + + return true; +} + +// Helper function that unquantizes some (seemingly random) generated +// numbers... meant to match the ASTC hardware. This function is used +// to unquantize both colors (Table C.2.16) and weights (Table C.2.26) +static inline int unquantize_value(unsigned mask, int A, int B, int C, int D) { + int T = D * C + B; + T = T ^ A; + T = (A & mask) | (T >> 2); + SkASSERT(T < 256); + return T; +} + +// Helper function to replicate the bits in x that represents an oldPrec +// precision integer into a prec precision integer. For example: +// 255 == replicate_bits(7, 3, 8); +static inline int replicate_bits(int x, int oldPrec, int prec) { + while (oldPrec < prec) { + const int toShift = SkMin32(prec-oldPrec, oldPrec); + x = (x << toShift) | (x >> (oldPrec - toShift)); + oldPrec += toShift; + } + + // Make sure that no bits are set outside the desired precision. + SkASSERT((-(1 << prec) & x) == 0); + return x; +} + +// Returns the unquantized value of a color that's represented only as +// a set of bits. +static inline int unquantize_bits_color(int val, int nBits) { + return replicate_bits(val, nBits, 8); +} + +// Returns the unquantized value of a color that's represented as a +// trit followed by nBits bits. This algorithm follows the sequence +// defined in section C.2.13 of the ASTC spec. +static inline int unquantize_trit_color(int val, int nBits) { + SkASSERT(nBits > 0); + SkASSERT(nBits < 7); + + const int D = (val >> nBits) & 0x3; + SkASSERT(D < 3); + + const int A = -(val & 0x1) & 0x1FF; + + static const int Cvals[6] = { 204, 93, 44, 22, 11, 5 }; + const int C = Cvals[nBits - 1]; + + int B = 0; + const SkTBits valBits(val); + switch (nBits) { + case 1: + B = 0; + break; + + case 2: { + const int b = valBits[1]; + B = (b << 1) | (b << 2) | (b << 4) | (b << 8); + } + break; + + case 3: { + const int cb = valBits(2, 1); + B = cb | (cb << 2) | (cb << 7); + } + break; + + case 4: { + const int dcb = valBits(3, 1); + B = dcb | (dcb << 6); + } + break; + + case 5: { + const int edcb = valBits(4, 1); + B = (edcb << 5) | (edcb >> 2); + } + break; + + case 6: { + const int fedcb = valBits(5, 1); + B = (fedcb << 4) | (fedcb >> 4); + } + break; + } + + return unquantize_value(0x80, A, B, C, D); +} + +// Returns the unquantized value of a color that's represented as a +// quint followed by nBits bits. This algorithm follows the sequence +// defined in section C.2.13 of the ASTC spec. +static inline int unquantize_quint_color(int val, int nBits) { + const int D = (val >> nBits) & 0x7; + SkASSERT(D < 5); + + const int A = -(val & 0x1) & 0x1FF; + + static const int Cvals[5] = { 113, 54, 26, 13, 6 }; + SkASSERT(nBits > 0); + SkASSERT(nBits < 6); + + const int C = Cvals[nBits - 1]; + + int B = 0; + const SkTBits valBits(val); + switch (nBits) { + case 1: + B = 0; + break; + + case 2: { + const int b = valBits[1]; + B = (b << 2) | (b << 3) | (b << 8); + } + break; + + case 3: { + const int cb = valBits(2, 1); + B = (cb >> 1) | (cb << 1) | (cb << 7); + } + break; + + case 4: { + const int dcb = valBits(3, 1); + B = (dcb >> 1) | (dcb << 6); + } + break; + + case 5: { + const int edcb = valBits(4, 1); + B = (edcb << 5) | (edcb >> 3); + } + break; + } + + return unquantize_value(0x80, A, B, C, D); +} + +// This algorithm takes a list of integers, stored in vals, and unquantizes them +// in place. This follows the algorithm laid out in section C.2.13 of the ASTC spec. +static void unquantize_colors(int *vals, int nVals, int nBits, int nTrits, int nQuints) { + for (int i = 0; i < nVals; ++i) { + if (nTrits > 0) { + SkASSERT(nQuints == 0); + vals[i] = unquantize_trit_color(vals[i], nBits); + } else if (nQuints > 0) { + SkASSERT(nTrits == 0); + vals[i] = unquantize_quint_color(vals[i], nBits); + } else { + SkASSERT(nQuints == 0 && nTrits == 0); + vals[i] = unquantize_bits_color(vals[i], nBits); + } + } +} + +// Returns an interpolated value between c0 and c1 based on the weight. This +// follows the algorithm laid out in section C.2.19 of the ASTC spec. +static int interpolate_channel(int c0, int c1, int weight) { + SkASSERT(0 <= c0 && c0 < 256); + SkASSERT(0 <= c1 && c1 < 256); + + c0 = (c0 << 8) | c0; + c1 = (c1 << 8) | c1; + + const int result = ((c0*(64 - weight) + c1*weight + 32) / 64) >> 8; + + if (result > 255) { + return 255; + } + + SkASSERT(result >= 0); + return result; +} + +// Returns an interpolated color between the two endpoints based on the weight. +static SkColor interpolate_endpoints(const SkColor endpoints[2], int weight) { + return SkColorSetARGB( + interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight), + interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight), + interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight), + interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight)); +} + +// Returns an interpolated color between the two endpoints based on the weight. +// It uses separate weights for the channel depending on the value of the 'plane' +// variable. By default, all channels will use weight 0, and the value of plane +// means that weight1 will be used for: +// 0: red +// 1: green +// 2: blue +// 3: alpha +static SkColor interpolate_dual_endpoints( + const SkColor endpoints[2], int weight0, int weight1, int plane) { + int a = interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight0); + int r = interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight0); + int g = interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight0); + int b = interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight0); + + switch (plane) { + + case 0: + r = interpolate_channel( + SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight1); + break; + + case 1: + g = interpolate_channel( + SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight1); + break; + + case 2: + b = interpolate_channel( + SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight1); + break; + + case 3: + a = interpolate_channel( + SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight1); + break; + + default: + SkDEBUGFAIL("Plane should be 0-3"); + break; + } + + return SkColorSetARGB(a, r, g, b); +} + +// A struct of decoded values that we use to carry around information +// about the block. dimX and dimY are the dimension in texels of the block, +// for which there is only a limited subset of valid values: +// +// 4x4, 5x4, 5x5, 6x5, 6x6, 8x5, 8x6, 8x8, 10x5, 10x6, 10x8, 10x10, 12x10, 12x12 + +struct ASTCDecompressionData { + ASTCDecompressionData(int dimX, int dimY) : fDimX(dimX), fDimY(dimY) { } + const int fDimX; // the X dimension of the decompressed block + const int fDimY; // the Y dimension of the decompressed block + ASTCBlock fBlock; // the block data + int fBlockMode; // the block header that contains the block mode. + + bool fDualPlaneEnabled; // is this block compressing dual weight planes? + int fDualPlane; // the independent plane in dual plane mode. + + bool fVoidExtent; // is this block a single color? + bool fError; // does this block have an error encoding? + + int fWeightDimX; // the x dimension of the weight grid + int fWeightDimY; // the y dimension of the weight grid + + int fWeightBits; // the number of bits used for each weight value + int fWeightTrits; // the number of trits used for each weight value + int fWeightQuints; // the number of quints used for each weight value + + int fPartCount; // the number of partitions in this block + int fPartIndex; // the partition index: only relevant if fPartCount > 0 + + // CEM values can be anything in the range 0-15, and each corresponds to a different + // mode that represents the color data. We only support LDR modes. + enum ColorEndpointMode { + kLDR_Luminance_Direct_ColorEndpointMode = 0, + kLDR_Luminance_BaseOffset_ColorEndpointMode = 1, + kHDR_Luminance_LargeRange_ColorEndpointMode = 2, + kHDR_Luminance_SmallRange_ColorEndpointMode = 3, + kLDR_LuminanceAlpha_Direct_ColorEndpointMode = 4, + kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode = 5, + kLDR_RGB_BaseScale_ColorEndpointMode = 6, + kHDR_RGB_BaseScale_ColorEndpointMode = 7, + kLDR_RGB_Direct_ColorEndpointMode = 8, + kLDR_RGB_BaseOffset_ColorEndpointMode = 9, + kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode = 10, + kHDR_RGB_ColorEndpointMode = 11, + kLDR_RGBA_Direct_ColorEndpointMode = 12, + kLDR_RGBA_BaseOffset_ColorEndpointMode = 13, + kHDR_RGB_LDRAlpha_ColorEndpointMode = 14, + kHDR_RGB_HDRAlpha_ColorEndpointMode = 15 + }; + static const int kMaxColorEndpointModes = 16; + + // the color endpoint modes for this block. + static const int kMaxPartitions = 4; + ColorEndpointMode fCEM[kMaxPartitions]; + + int fColorStartBit; // The bit position of the first bit of the color data + int fColorEndBit; // The bit position of the last *possible* bit of the color data + + // Returns the number of partitions for this block. + int numPartitions() const { + return fPartCount; + } + + // Returns the total number of weight values that are stored in this block + int numWeights() const { + return fWeightDimX * fWeightDimY * (fDualPlaneEnabled ? 2 : 1); + } + +#ifdef SK_DEBUG + // Returns the maximum value that any weight can take. We really only use + // this function for debugging. + int maxWeightValue() const { + int maxVal = (1 << fWeightBits); + if (fWeightTrits > 0) { + SkASSERT(0 == fWeightQuints); + maxVal *= 3; + } else if (fWeightQuints > 0) { + SkASSERT(0 == fWeightTrits); + maxVal *= 5; + } + return maxVal - 1; + } +#endif + + // The number of bits needed to represent the texel weight data. This + // comes from the 'data size determination' section of the ASTC spec (C.2.22) + int numWeightBits() const { + const int nWeights = this->numWeights(); + return + ((nWeights*8*fWeightTrits + 4) / 5) + + ((nWeights*7*fWeightQuints + 2) / 3) + + (nWeights*fWeightBits); + } + + // Returns the number of color values stored in this block. The number of + // values stored is directly a function of the color endpoint modes. + int numColorValues() const { + int numValues = 0; + for (int i = 0; i < this->numPartitions(); ++i) { + int cemInt = static_cast(fCEM[i]); + numValues += ((cemInt >> 2) + 1) * 2; + } + + return numValues; + } + + // Figures out the number of bits available for color values, and fills + // in the maximum encoding that will fit the number of color values that + // we need. Returns false on error. (See section C.2.22 of the spec) + bool getColorValueEncoding(int *nBits, int *nTrits, int *nQuints) const { + if (NULL == nBits || NULL == nTrits || NULL == nQuints) { + return false; + } + + const int nColorVals = this->numColorValues(); + if (nColorVals <= 0) { + return false; + } + + const int colorBits = fColorEndBit - fColorStartBit; + SkASSERT(colorBits > 0); + + // This is the minimum amount of accuracy required by the spec. + if (colorBits < ((13 * nColorVals + 4) / 5)) { + return false; + } + + // Values can be represented as at most 8-bit values. + // !SPEED! place this in a lookup table based on colorBits and nColorVals + for (int i = 255; i > 0; --i) { + int range = i + 1; + int bits = 0, trits = 0, quints = 0; + bool valid = false; + if (SkIsPow2(range)) { + bits = bits_for_range(range); + valid = true; + } else if ((range % 3) == 0 && SkIsPow2(range/3)) { + trits = 1; + bits = bits_for_range(range/3); + valid = true; + } else if ((range % 5) == 0 && SkIsPow2(range/5)) { + quints = 1; + bits = bits_for_range(range/5); + valid = true; + } + + if (valid) { + const int actualColorBits = + ((nColorVals*8*trits + 4) / 5) + + ((nColorVals*7*quints + 2) / 3) + + (nColorVals*bits); + if (actualColorBits <= colorBits) { + *nTrits = trits; + *nQuints = quints; + *nBits = bits; + return true; + } + } + } + + return false; + } + + // Converts the sequence of color values into endpoints. The algorithm here + // corresponds to the values determined by section C.2.14 of the ASTC spec + void colorEndpoints(SkColor endpoints[4][2], const int* colorValues) const { + for (int i = 0; i < this->numPartitions(); ++i) { + switch (fCEM[i]) { + case kLDR_Luminance_Direct_ColorEndpointMode: { + const int* v = colorValues; + endpoints[i][0] = SkColorSetARGB(0xFF, v[0], v[0], v[0]); + endpoints[i][1] = SkColorSetARGB(0xFF, v[1], v[1], v[1]); + + colorValues += 2; + } + break; + + case kLDR_Luminance_BaseOffset_ColorEndpointMode: { + const int* v = colorValues; + const int L0 = (v[0] >> 2) | (v[1] & 0xC0); + const int L1 = clamp_byte(L0 + (v[1] & 0x3F)); + + endpoints[i][0] = SkColorSetARGB(0xFF, L0, L0, L0); + endpoints[i][1] = SkColorSetARGB(0xFF, L1, L1, L1); + + colorValues += 2; + } + break; + + case kLDR_LuminanceAlpha_Direct_ColorEndpointMode: { + const int* v = colorValues; + + endpoints[i][0] = SkColorSetARGB(v[2], v[0], v[0], v[0]); + endpoints[i][1] = SkColorSetARGB(v[3], v[1], v[1], v[1]); + + colorValues += 4; + } + break; + + case kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode: { + int v0 = colorValues[0]; + int v1 = colorValues[1]; + int v2 = colorValues[2]; + int v3 = colorValues[3]; + + bit_transfer_signed(&v1, &v0); + bit_transfer_signed(&v3, &v2); + + endpoints[i][0] = SkColorSetARGB(v2, v0, v0, v0); + endpoints[i][1] = SkColorSetARGB( + clamp_byte(v3+v2), + clamp_byte(v1+v0), + clamp_byte(v1+v0), + clamp_byte(v1+v0)); + + colorValues += 4; + } + break; + + case kLDR_RGB_BaseScale_ColorEndpointMode: { + decode_rgba_basescale(colorValues, endpoints[i], true); + colorValues += 4; + } + break; + + case kLDR_RGB_Direct_ColorEndpointMode: { + decode_rgba_direct(colorValues, endpoints[i], true); + colorValues += 6; + } + break; + + case kLDR_RGB_BaseOffset_ColorEndpointMode: { + decode_rgba_baseoffset(colorValues, endpoints[i], true); + colorValues += 6; + } + break; + + case kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode: { + decode_rgba_basescale(colorValues, endpoints[i], false); + colorValues += 6; + } + break; + + case kLDR_RGBA_Direct_ColorEndpointMode: { + decode_rgba_direct(colorValues, endpoints[i], false); + colorValues += 8; + } + break; + + case kLDR_RGBA_BaseOffset_ColorEndpointMode: { + decode_rgba_baseoffset(colorValues, endpoints[i], false); + colorValues += 8; + } + break; + + default: + SkDEBUGFAIL("HDR mode unsupported! This should be caught sooner."); + break; + } + } + } + + // Follows the procedure from section C.2.17 of the ASTC specification + int unquantizeWeight(int x) const { + SkASSERT(x <= this->maxWeightValue()); + + const int D = (x >> fWeightBits) & 0x7; + const int A = -(x & 0x1) & 0x7F; + + SkTBits xbits(x); + + int T = 0; + if (fWeightTrits > 0) { + SkASSERT(0 == fWeightQuints); + switch (fWeightBits) { + case 0: { + // x is a single trit + SkASSERT(x < 3); + + static const int kUnquantizationTable[3] = { 0, 32, 63 }; + T = kUnquantizationTable[x]; + } + break; + + case 1: { + const int B = 0; + const int C = 50; + T = unquantize_value(0x20, A, B, C, D); + } + break; + + case 2: { + const int b = xbits[1]; + const int B = b | (b << 2) | (b << 6); + const int C = 23; + T = unquantize_value(0x20, A, B, C, D); + } + break; + + case 3: { + const int cb = xbits(2, 1); + const int B = cb | (cb << 5); + const int C = 11; + T = unquantize_value(0x20, A, B, C, D); + } + break; + + default: + SkDEBUGFAIL("Too many bits for trit encoding"); + break; + } + + } else if (fWeightQuints > 0) { + SkASSERT(0 == fWeightTrits); + switch (fWeightBits) { + case 0: { + // x is a single quint + SkASSERT(x < 5); + + static const int kUnquantizationTable[5] = { 0, 16, 32, 47, 63 }; + T = kUnquantizationTable[x]; + } + break; + + case 1: { + const int B = 0; + const int C = 28; + T = unquantize_value(0x20, A, B, C, D); + } + break; + + case 2: { + const int b = xbits[1]; + const int B = (b << 1) | (b << 6); + const int C = 13; + T = unquantize_value(0x20, A, B, C, D); + } + break; + + default: + SkDEBUGFAIL("Too many bits for quint encoding"); + break; + } + } else { + SkASSERT(0 == fWeightTrits); + SkASSERT(0 == fWeightQuints); + + T = replicate_bits(x, fWeightBits, 6); + } + + // This should bring the value within [0, 63].. + SkASSERT(T <= 63); + + if (T > 32) { + T += 1; + } + + SkASSERT(T <= 64); + + return T; + } + + // Returns the weight at the associated index. If the index is out of bounds, it + // returns zero. It also chooses the weight appropriately based on the given dual + // plane. + int getWeight(const int* unquantizedWeights, int idx, bool dualPlane) const { + const int maxIdx = (fDualPlaneEnabled ? 2 : 1) * fWeightDimX * fWeightDimY - 1; + if (fDualPlaneEnabled) { + const int effectiveIdx = 2*idx + (dualPlane ? 1 : 0); + if (effectiveIdx > maxIdx) { + return 0; + } + return unquantizedWeights[effectiveIdx]; + } + + SkASSERT(!dualPlane); + + if (idx > maxIdx) { + return 0; + } else { + return unquantizedWeights[idx]; + } + } + + // This computes the effective weight at location (s, t) of the block. This + // weight is computed by sampling the texel weight grid (it's usually not 1-1), and + // then applying a bilerp. The algorithm outlined here follows the algorithm + // defined in section C.2.18 of the ASTC spec. + int infillWeight(const int* unquantizedValues, int s, int t, bool dualPlane) const { + const int Ds = (1024 + fDimX/2) / (fDimX - 1); + const int Dt = (1024 + fDimY/2) / (fDimY - 1); + + const int cs = Ds * s; + const int ct = Dt * t; + + const int gs = (cs*(fWeightDimX - 1) + 32) >> 6; + const int gt = (ct*(fWeightDimY - 1) + 32) >> 6; + + const int js = gs >> 4; + const int jt = gt >> 4; + + const int fs = gs & 0xF; + const int ft = gt & 0xF; + + const int idx = js + jt*fWeightDimX; + const int p00 = this->getWeight(unquantizedValues, idx, dualPlane); + const int p01 = this->getWeight(unquantizedValues, idx + 1, dualPlane); + const int p10 = this->getWeight(unquantizedValues, idx + fWeightDimX, dualPlane); + const int p11 = this->getWeight(unquantizedValues, idx + fWeightDimX + 1, dualPlane); + + const int w11 = (fs*ft + 8) >> 4; + const int w10 = ft - w11; + const int w01 = fs - w11; + const int w00 = 16 - fs - ft + w11; + + const int weight = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; + SkASSERT(weight <= 64); + return weight; + } + + // Unquantizes the decoded texel weights as described in section C.2.17 of + // the ASTC specification. Additionally, it populates texelWeights with + // the expanded weight grid, which is computed according to section C.2.18 + void texelWeights(int texelWeights[2][12][12], const int* texelValues) const { + // Unquantized texel weights... + int unquantizedValues[144*2]; // 12x12 blocks with dual plane decoding... + SkASSERT(this->numWeights() <= 144*2); + + // Unquantize the weights and cache them + for (int j = 0; j < this->numWeights(); ++j) { + unquantizedValues[j] = this->unquantizeWeight(texelValues[j]); + } + + // Do weight infill... + for (int y = 0; y < fDimY; ++y) { + for (int x = 0; x < fDimX; ++x) { + texelWeights[0][x][y] = this->infillWeight(unquantizedValues, x, y, false); + if (fDualPlaneEnabled) { + texelWeights[1][x][y] = this->infillWeight(unquantizedValues, x, y, true); + } + } + } + } + + // Returns the partition for the texel located at position (x, y). + // Adapted from C.2.21 of the ASTC specification + int getPartition(int x, int y) const { + const int partitionCount = this->numPartitions(); + int seed = fPartIndex; + if ((fDimX * fDimY) < 31) { + x <<= 1; + y <<= 1; + } + + seed += (partitionCount - 1) * 1024; + + uint32_t p = seed; + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + + uint32_t rnum = p; + uint8_t seed1 = rnum & 0xF; + uint8_t seed2 = (rnum >> 4) & 0xF; + uint8_t seed3 = (rnum >> 8) & 0xF; + uint8_t seed4 = (rnum >> 12) & 0xF; + uint8_t seed5 = (rnum >> 16) & 0xF; + uint8_t seed6 = (rnum >> 20) & 0xF; + uint8_t seed7 = (rnum >> 24) & 0xF; + uint8_t seed8 = (rnum >> 28) & 0xF; + uint8_t seed9 = (rnum >> 18) & 0xF; + uint8_t seed10 = (rnum >> 22) & 0xF; + uint8_t seed11 = (rnum >> 26) & 0xF; + uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; + + seed1 *= seed1; seed2 *= seed2; + seed3 *= seed3; seed4 *= seed4; + seed5 *= seed5; seed6 *= seed6; + seed7 *= seed7; seed8 *= seed8; + seed9 *= seed9; seed10 *= seed10; + seed11 *= seed11; seed12 *= seed12; + + int sh1, sh2, sh3; + if (0 != (seed & 1)) { + sh1 = (0 != (seed & 2))? 4 : 5; + sh2 = (partitionCount == 3)? 6 : 5; + } else { + sh1 = (partitionCount==3)? 6 : 5; + sh2 = (0 != (seed & 2))? 4 : 5; + } + sh3 = (0 != (seed & 0x10))? sh1 : sh2; + + seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2; + seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2; + seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3; + + const int z = 0; + int a = seed1*x + seed2*y + seed11*z + (rnum >> 14); + int b = seed3*x + seed4*y + seed12*z + (rnum >> 10); + int c = seed5*x + seed6*y + seed9 *z + (rnum >> 6); + int d = seed7*x + seed8*y + seed10*z + (rnum >> 2); + + a &= 0x3F; + b &= 0x3F; + c &= 0x3F; + d &= 0x3F; + + if (partitionCount < 4) { + d = 0; + } + + if (partitionCount < 3) { + c = 0; + } + + if (a >= b && a >= c && a >= d) { + return 0; + } else if (b >= c && b >= d) { + return 1; + } else if (c >= d) { + return 2; + } else { + return 3; + } + } + + // Performs the proper interpolation of the texel based on the + // endpoints and weights. + SkColor getTexel(const SkColor endpoints[4][2], + const int weights[2][12][12], + int x, int y) const { + int part = 0; + if (this->numPartitions() > 1) { + part = this->getPartition(x, y); + } + + SkColor result; + if (fDualPlaneEnabled) { + result = interpolate_dual_endpoints( + endpoints[part], weights[0][x][y], weights[1][x][y], fDualPlane); + } else { + result = interpolate_endpoints(endpoints[part], weights[0][x][y]); + } + +#if 1 + // !FIXME! if we're writing directly to a bitmap, then we don't need + // to swap the red and blue channels, but since we're usually being used + // by the SkImageDecoder_astc module, the results are expected to be in RGBA. + result = SkColorSetARGB( + SkColorGetA(result), SkColorGetB(result), SkColorGetG(result), SkColorGetR(result)); +#endif + + return result; + } + + void decode() { + // First decode the block mode. + this->decodeBlockMode(); + + // Now we can decode the partition information. + fPartIndex = static_cast(read_astc_bits(fBlock, 11, 23)); + fPartCount = (fPartIndex & 0x3) + 1; + fPartIndex >>= 2; + + // This is illegal + if (fDualPlaneEnabled && this->numPartitions() == 4) { + fError = true; + return; + } + + // Based on the partition info, we can decode the color information. + this->decodeColorData(); + } + + // Decodes the dual plane based on the given bit location. The final + // location, if the dual plane is enabled, is also the end of our color data. + // This function is only meant to be used from this->decodeColorData() + void decodeDualPlane(int bitLoc) { + if (fDualPlaneEnabled) { + fDualPlane = static_cast(read_astc_bits(fBlock, bitLoc - 2, bitLoc)); + fColorEndBit = bitLoc - 2; + } else { + fColorEndBit = bitLoc; + } + } + + // Decodes the color information based on the ASTC spec. + void decodeColorData() { + + // By default, the last color bit is at the end of the texel weights + const int lastWeight = 128 - this->numWeightBits(); + + // If we have a dual plane then it will be at this location, too. + int dualPlaneBitLoc = lastWeight; + + // If there's only one partition, then our job is (relatively) easy. + if (this->numPartitions() == 1) { + fCEM[0] = static_cast(read_astc_bits(fBlock, 13, 17)); + fColorStartBit = 17; + + // Handle dual plane mode... + this->decodeDualPlane(dualPlaneBitLoc); + + return; + } + + // If we have more than one partition, then we need to make + // room for the partition index. + fColorStartBit = 29; + + // Read the base CEM. If it's zero, then we have no additional + // CEM data and the endpoints for each partition share the same CEM. + const int baseCEM = static_cast(read_astc_bits(fBlock, 23, 25)); + if (0 == baseCEM) { + + const ColorEndpointMode sameCEM = + static_cast(read_astc_bits(fBlock, 25, 29)); + + for (int i = 0; i < kMaxPartitions; ++i) { + fCEM[i] = sameCEM; + } + + // Handle dual plane mode... + this->decodeDualPlane(dualPlaneBitLoc); + + return; + } + + // Move the dual plane selector bits down based on how many + // partitions the block contains. + switch (this->numPartitions()) { + case 2: + dualPlaneBitLoc -= 2; + break; + + case 3: + dualPlaneBitLoc -= 5; + break; + + case 4: + dualPlaneBitLoc -= 8; + break; + + default: + SkDEBUGFAIL("Internal ASTC decoding error."); + break; + } + + // The rest of the CEM config will be between the dual plane bit selector + // and the texel weight grid. + const int lowCEM = static_cast(read_astc_bits(fBlock, 23, 29)); + SkASSERT(lastWeight - dualPlaneBitLoc > 31); + int fullCEM = static_cast(read_astc_bits(fBlock, dualPlaneBitLoc, lastWeight)); + + // Attach the config at the end of the weight grid to the CEM values + // in the beginning of the block. + fullCEM = (fullCEM << 6) | lowCEM; + + // Ignore the two least significant bits, since those are our baseCEM above. + fullCEM = fullCEM >> 2; + + int C[kMaxPartitions]; // Next, decode C and M from the spec (Table C.2.12) + for (int i = 0; i < this->numPartitions(); ++i) { + C[i] = fullCEM & 1; + fullCEM = fullCEM >> 1; + } + + int M[kMaxPartitions]; + for (int i = 0; i < this->numPartitions(); ++i) { + M[i] = fullCEM & 0x3; + fullCEM = fullCEM >> 2; + } + + // Construct our CEMs.. + SkASSERT(baseCEM > 0); + for (int i = 0; i < this->numPartitions(); ++i) { + int cem = (baseCEM - 1) * 4; + cem += (0 == C[i])? 0 : 4; + cem += M[i]; + + SkASSERT(cem < 16); + fCEM[i] = static_cast(cem); + } + + // Finally, if we have dual plane mode, then read the plane selector. + this->decodeDualPlane(dualPlaneBitLoc); + } + + // Decodes the block mode. This function determines whether or not we use + // dual plane encoding, the size of the texel weight grid, and the number of + // bits, trits and quints that are used to encode it. For more information, + // see section C.2.10 of the ASTC spec. + // + // For 2D blocks, the Block Mode field is laid out as follows: + // + // ------------------------------------------------------------------------- + // 10 9 8 7 6 5 4 3 2 1 0 Width Height Notes + // ------------------------------------------------------------------------- + // D H B A R0 0 0 R2 R1 B+4 A+2 + // D H B A R0 0 1 R2 R1 B+8 A+2 + // D H B A R0 1 0 R2 R1 A+2 B+8 + // D H 0 B A R0 1 1 R2 R1 A+2 B+6 + // D H 1 B A R0 1 1 R2 R1 B+2 A+2 + // D H 0 0 A R0 R2 R1 0 0 12 A+2 + // D H 0 1 A R0 R2 R1 0 0 A+2 12 + // D H 1 1 0 0 R0 R2 R1 0 0 6 10 + // D H 1 1 0 1 R0 R2 R1 0 0 10 6 + // B 1 0 A R0 R2 R1 0 0 A+6 B+6 D=0, H=0 + // x x 1 1 1 1 1 1 1 0 0 - - Void-extent + // x x 1 1 1 x x x x 0 0 - - Reserved* + // x x x x x x x 0 0 0 0 - - Reserved + // ------------------------------------------------------------------------- + // + // D - dual plane enabled + // H, R - used to determine the number of bits/trits/quints in texel weight encoding + // R is a three bit value whose LSB is R0 and MSB is R1 + // Width, Height - dimensions of the texel weight grid (determined by A and B) + + void decodeBlockMode() { + const int blockMode = static_cast(read_astc_bits(fBlock, 0, 11)); + + // Check for special void extent encoding + fVoidExtent = (blockMode & 0x1FF) == 0x1FC; + + // Check for reserved block modes + fError = ((blockMode & 0x1C3) == 0x1C0) || ((blockMode & 0xF) == 0); + + // Neither reserved nor void-extent, decode as usual + // This code corresponds to table C.2.8 of the ASTC spec + bool highPrecision = false; + int R = 0; + if ((blockMode & 0x3) == 0) { + R = ((0xC & blockMode) >> 1) | ((0x10 & blockMode) >> 4); + const int bitsSevenAndEight = (blockMode & 0x180) >> 7; + SkASSERT(0 <= bitsSevenAndEight && bitsSevenAndEight < 4); + + const int A = (blockMode >> 5) & 0x3; + const int B = (blockMode >> 9) & 0x3; + + fDualPlaneEnabled = (blockMode >> 10) & 0x1; + highPrecision = (blockMode >> 9) & 0x1; + + switch (bitsSevenAndEight) { + default: + case 0: + fWeightDimX = 12; + fWeightDimY = A + 2; + break; + + case 1: + fWeightDimX = A + 2; + fWeightDimY = 12; + break; + + case 2: + fWeightDimX = A + 6; + fWeightDimY = B + 6; + fDualPlaneEnabled = false; + highPrecision = false; + break; + + case 3: + if (0 == A) { + fWeightDimX = 6; + fWeightDimY = 10; + } else { + fWeightDimX = 10; + fWeightDimY = 6; + } + break; + } + } else { // (blockMode & 0x3) != 0 + R = ((blockMode & 0x3) << 1) | ((blockMode & 0x10) >> 4); + + const int bitsTwoAndThree = (blockMode >> 2) & 0x3; + SkASSERT(0 <= bitsTwoAndThree && bitsTwoAndThree < 4); + + const int A = (blockMode >> 5) & 0x3; + const int B = (blockMode >> 7) & 0x3; + + fDualPlaneEnabled = (blockMode >> 10) & 0x1; + highPrecision = (blockMode >> 9) & 0x1; + + switch (bitsTwoAndThree) { + case 0: + fWeightDimX = B + 4; + fWeightDimY = A + 2; + break; + case 1: + fWeightDimX = B + 8; + fWeightDimY = A + 2; + break; + case 2: + fWeightDimX = A + 2; + fWeightDimY = B + 8; + break; + case 3: + if ((B & 0x2) == 0) { + fWeightDimX = A + 2; + fWeightDimY = (B & 1) + 6; + } else { + fWeightDimX = (B & 1) + 2; + fWeightDimY = A + 2; + } + break; + } + } + + // We should have set the values of R and highPrecision + // from decoding the block mode, these are used to determine + // the proper dimensions of our weight grid. + if ((R & 0x6) == 0) { + fError = true; + } else { + static const int kBitAllocationTable[2][6][3] = { + { + { 1, 0, 0 }, + { 0, 1, 0 }, + { 2, 0, 0 }, + { 0, 0, 1 }, + { 1, 1, 0 }, + { 3, 0, 0 } + }, + { + { 1, 0, 1 }, + { 2, 1, 0 }, + { 4, 0, 0 }, + { 2, 0, 1 }, + { 3, 1, 0 }, + { 5, 0, 0 } + } + }; + + fWeightBits = kBitAllocationTable[highPrecision][R - 2][0]; + fWeightTrits = kBitAllocationTable[highPrecision][R - 2][1]; + fWeightQuints = kBitAllocationTable[highPrecision][R - 2][2]; + } + } +}; + +// Reads an ASTC block from the given pointer. +static inline void read_astc_block(ASTCDecompressionData *dst, const uint8_t* src) { + const uint64_t* qword = reinterpret_cast(src); + dst->fBlock.fLow = SkEndian_SwapLE64(qword[0]); + dst->fBlock.fHigh = SkEndian_SwapLE64(qword[1]); + dst->decode(); +} + +// Take a known void-extent block, and write out the values as a constant color. +static void decompress_void_extent(uint8_t* dst, int dstRowBytes, + const ASTCDecompressionData &data) { + // The top 64 bits contain 4 16-bit RGBA values. + int a = (static_cast(read_astc_bits(data.fBlock, 112, 128)) + 255) >> 8; + int b = (static_cast(read_astc_bits(data.fBlock, 96, 112)) + 255) >> 8; + int g = (static_cast(read_astc_bits(data.fBlock, 80, 96)) + 255) >> 8; + int r = (static_cast(read_astc_bits(data.fBlock, 64, 80)) + 255) >> 8; + + write_constant_color(dst, data.fDimX, data.fDimY, dstRowBytes, SkColorSetARGB(a, r, g, b)); +} + +// Decompresses a single ASTC block. It's assumed that data.fDimX and data.fDimY are +// set and that the block has already been decoded (i.e. data.decode() has been called) +static void decompress_astc_block(uint8_t* dst, int dstRowBytes, + const ASTCDecompressionData &data) { + if (data.fError) { + write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes); + return; + } + + if (data.fVoidExtent) { + decompress_void_extent(dst, dstRowBytes, data); + return; + } + + // According to the spec, any more than 64 values is illegal. (C.2.24) + static const int kMaxTexelValues = 64; + + // Decode the texel weights. + int texelValues[kMaxTexelValues]; + bool success = decode_integer_sequence( + texelValues, kMaxTexelValues, data.numWeights(), + // texel data goes to the end of the 128 bit block. + data.fBlock, 128, 128 - data.numWeightBits(), false, + data.fWeightBits, data.fWeightTrits, data.fWeightQuints); + + if (!success) { + write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes); + return; + } + + // Decode the color endpoints + int colorBits, colorTrits, colorQuints; + if (!data.getColorValueEncoding(&colorBits, &colorTrits, &colorQuints)) { + write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes); + return; + } + + // According to the spec, any more than 18 color values is illegal. (C.2.24) + static const int kMaxColorValues = 18; + + int colorValues[kMaxColorValues]; + success = decode_integer_sequence( + colorValues, kMaxColorValues, data.numColorValues(), + data.fBlock, data.fColorStartBit, data.fColorEndBit, true, + colorBits, colorTrits, colorQuints); + + if (!success) { + write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes); + return; + } + + // Unquantize the color values after they've been decoded. + unquantize_colors(colorValues, data.numColorValues(), colorBits, colorTrits, colorQuints); + + // Decode the colors into the appropriate endpoints. + SkColor endpoints[4][2]; + data.colorEndpoints(endpoints, colorValues); + + // Do texel infill and decode the texel values. + int texelWeights[2][12][12]; + data.texelWeights(texelWeights, texelValues); + + // Write the texels by interpolating them based on the information + // stored in the block. + dst += data.fDimY * dstRowBytes; + for (int y = 0; y < data.fDimY; ++y) { + dst -= dstRowBytes; + SkColor* colorPtr = reinterpret_cast(dst); + for (int x = 0; x < data.fDimX; ++x) { + colorPtr[x] = data.getTexel(endpoints, texelWeights, x, y); + } + } +} + //////////////////////////////////////////////////////////////////////////////// namespace SkTextureCompressor { -bool CompressA8To12x12ASTC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) { +bool CompressA8To12x12ASTC(uint8_t* dst, const uint8_t* src, + int width, int height, int rowBytes) { if (width < 0 || ((width % 12) != 0) || height < 0 || ((height % 12) != 0)) { return false; } @@ -285,4 +2017,25 @@ SkBlitter* CreateASTCBlitter(int width, int height, void* outputBuffer) { (width, height, outputBuffer); } +void DecompressASTC(uint8_t* dst, int dstRowBytes, const uint8_t* src, + int width, int height, int blockDimX, int blockDimY) { + // ASTC is encoded in what they call "raster order", so that the first + // block is the bottom-left block in the image, and the first pixel + // is the bottom-left pixel of the image + dst += height * dstRowBytes; + + ASTCDecompressionData data(blockDimX, blockDimY); + for (int y = 0; y < height; y += blockDimY) { + dst -= blockDimY * dstRowBytes; + SkColor *colorPtr = reinterpret_cast(dst); + for (int x = 0; x < width; x += blockDimX) { + read_astc_block(&data, src); + decompress_astc_block(reinterpret_cast(colorPtr + x), dstRowBytes, data); + + // ASTC encoded blocks are 16 bytes (128 bits) large. + src += 16; + } + } +} + } // SkTextureCompressor diff --git a/src/utils/SkTextureCompressor_ASTC.h b/src/utils/SkTextureCompressor_ASTC.h index 152fc62f16..57ba08d440 100644 --- a/src/utils/SkTextureCompressor_ASTC.h +++ b/src/utils/SkTextureCompressor_ASTC.h @@ -19,6 +19,9 @@ namespace SkTextureCompressor { int width, int height, int rowBytes); SkBlitter* CreateASTCBlitter(int width, int height, void* outputBuffer); + + void DecompressASTC(uint8_t* dst, int dstRowBytes, const uint8_t* src, + int width, int height, int blockDimX, int blockDimY); } #endif // SkTextureCompressor_ASTC_DEFINED