Reland "Respect full precision for RGB16 PNGs" (part 2)

This lands all the new xform hooks but no change to src/codec.
So the new decode features are turned off.

I'm relanding this in pieces to try to bisect a
strange MSAN error.

Original CL:
https://skia-review.googlesource.com/c/7085/

BUG=skia:

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Debug-MSAN,Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD,Build-Ubuntu-Clang-x86_64-Release-Fast


Change-Id: I451a2a29c73ca475e9e7a5ded58d4948d6b8be19
Reviewed-on: https://skia-review.googlesource.com/7277
Reviewed-by: Matt Sarett <msarett@google.com>
Commit-Queue: Matt Sarett <msarett@google.com>
This commit is contained in:
Matt Sarett 2017-01-19 12:04:32 -05:00 committed by Skia Commit-Bot
parent f720098671
commit 5bee0b6de6
8 changed files with 129 additions and 4 deletions

View File

@ -29,6 +29,7 @@ public:
kBGRA_8888_ColorFormat,
// Unsigned, big-endian, 16-bit integer
kRGB_U16_BE_ColorFormat, // Src only
kRGBA_U16_BE_ColorFormat, // Src only
kRGBA_F16_ColorFormat, // Dst only

View File

@ -1104,7 +1104,10 @@ bool SkColorSpaceXform_XYZ<kCSM>
}
}
if (kRGBA_F32_ColorFormat == dstColorFormat || kRGBA_U16_BE_ColorFormat == srcColorFormat) {
if (kRGBA_F32_ColorFormat == dstColorFormat ||
kRGBA_U16_BE_ColorFormat == srcColorFormat ||
kRGB_U16_BE_ColorFormat == srcColorFormat)
{
return this->applyPipeline(dstColorFormat, dst, srcColorFormat, src, len, alphaType);
}
@ -1221,6 +1224,24 @@ bool SkColorSpaceXform_XYZ<kCSM>
break;
}
break;
case kRGB_U16_BE_ColorFormat:
switch (fSrcGamma) {
case kLinear_SrcGamma:
pipeline.append(SkRasterPipeline::load_rgb_u16_be, &src);
break;
case kSRGB_SrcGamma:
pipeline.append(SkRasterPipeline::load_rgb_u16_be, &src);
pipeline.append_from_srgb(kUnpremul_SkAlphaType);
break;
case kTable_SrcGamma:
loadTables.fSrc = src;
loadTables.fR = fSrcGammaTables[0];
loadTables.fG = fSrcGammaTables[1];
loadTables.fB = fSrcGammaTables[2];
pipeline.append(SkRasterPipeline::load_tables_rgb_u16_be, &loadTables);
break;
}
break;
default:
return false;
}

View File

@ -31,6 +31,9 @@ bool SkColorSpaceXform_A2B::onApply(ColorFormat dstFormat, void* dst, ColorForma
case kRGBA_U16_BE_ColorFormat:
pipeline.append(SkRasterPipeline::load_u16_be, &src);
break;
case kRGB_U16_BE_ColorFormat:
pipeline.append(SkRasterPipeline::load_rgb_u16_be, &src);
break;
default:
SkCSXformPrintf("F16/F32 source color format not supported\n");
return false;

View File

@ -68,6 +68,16 @@ struct SkNx {
*c = SkNx{cl, ch};
*d = SkNx{dl, dh};
}
AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
auto ptr = (const char*)vptr;
Half al, bl, cl,
ah, bh, ch;
Half::Load3(ptr , &al, &bl, &cl);
Half::Load3(ptr + 3*N/2*sizeof(T), &ah, &bh, &ch);
*a = SkNx{al, ah};
*b = SkNx{bl, bh};
*c = SkNx{cl, ch};
}
AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
auto ptr = (char*)vptr;
Half::Store4(ptr, a.fLo, b.fLo, c.fLo, d.fLo);
@ -149,6 +159,12 @@ struct SkNx<1,T> {
*c = Load(ptr + 2*sizeof(T));
*d = Load(ptr + 3*sizeof(T));
}
AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
auto ptr = (const char*)vptr;
*a = Load(ptr + 0*sizeof(T));
*b = Load(ptr + 1*sizeof(T));
*c = Load(ptr + 2*sizeof(T));
}
AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
auto ptr = (char*)vptr;
a.store(ptr + 0*sizeof(T));

View File

@ -69,8 +69,9 @@
M(load_565) M(store_565) \
M(load_f16) M(store_f16) \
M(load_8888) M(store_8888) \
M(load_u16_be) \
M(load_tables) M(load_tables_u16_be) M(store_tables) \
M(load_u16_be) M(load_rgb_u16_be) \
M(load_tables_u16_be) M(load_tables_rgb_u16_be) \
M(load_tables) M(store_tables) \
M(scale_u8) M(scale_1_float) \
M(lerp_u8) M(lerp_565) M(lerp_1_float) \
M(dstatop) M(dstin) M(dstout) M(dstover) \

View File

@ -241,7 +241,12 @@ public:
*b = rgba.val[2];
*a = rgba.val[3];
}
AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
uint16x4x3_t rgba = vld3_u16((const uint16_t*)ptr);
*r = rgba.val[0];
*g = rgba.val[1];
*b = rgba.val[2];
}
AI static void Store4(void* dst, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
uint16x4x4_t rgba = {{
r.fVec,

View File

@ -271,6 +271,22 @@ public:
*b = ba;
*a = _mm_srli_si128(ba, 8);
}
AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
// The idea here is to get 4 vectors that are R G B _ _ _ _ _.
// The second load is at a funny location to make sure we don't read past
// the bounds of memory. This is fine, we just need to shift it a little bit.
const uint8_t* ptr8 = (const uint8_t*) ptr;
__m128i rgb0 = _mm_loadu_si128((const __m128i*) (ptr8 + 0));
__m128i rgb1 = _mm_srli_si128(rgb0, 3*2);
__m128i rgb2 = _mm_srli_si128(_mm_loadu_si128((const __m128i*) (ptr8 + 4*2)), 2*2);
__m128i rgb3 = _mm_srli_si128(rgb2, 3*2);
__m128i rrggbb01 = _mm_unpacklo_epi16(rgb0, rgb1);
__m128i rrggbb23 = _mm_unpacklo_epi16(rgb2, rgb3);
*r = _mm_unpacklo_epi32(rrggbb01, rrggbb23);
*g = _mm_srli_si128(r->fVec, 4*2);
*b = _mm_unpackhi_epi32(rrggbb01, rrggbb23);
}
AI static void Store4(void* dst, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
__m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec);
__m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec);
@ -334,6 +350,32 @@ public:
*b = _mm_unpacklo_epi64(ba0123, ba4567);
*a = _mm_unpackhi_epi64(ba0123, ba4567);
}
AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
// TODO: AVX2 version
const uint8_t* ptr8 = (const uint8_t*) ptr;
__m128i rgb0 = _mm_loadu_si128((const __m128i*) (ptr8 + 0*2));
__m128i rgb1 = _mm_srli_si128(rgb0, 3*2);
__m128i rgb2 = _mm_loadu_si128((const __m128i*) (ptr8 + 6*2));
__m128i rgb3 = _mm_srli_si128(rgb2, 3*2);
__m128i rgb4 = _mm_loadu_si128((const __m128i*) (ptr8 + 12*2));
__m128i rgb5 = _mm_srli_si128(rgb4, 3*2);
__m128i rgb6 = _mm_srli_si128(_mm_loadu_si128((const __m128i*) (ptr8 + 16*2)), 2*2);
__m128i rgb7 = _mm_srli_si128(rgb6, 3*2);
__m128i rgb01 = _mm_unpacklo_epi16(rgb0, rgb1);
__m128i rgb23 = _mm_unpacklo_epi16(rgb2, rgb3);
__m128i rgb45 = _mm_unpacklo_epi16(rgb4, rgb5);
__m128i rgb67 = _mm_unpacklo_epi16(rgb6, rgb7);
__m128i rg03 = _mm_unpacklo_epi32(rgb01, rgb23);
__m128i bx03 = _mm_unpackhi_epi32(rgb01, rgb23);
__m128i rg47 = _mm_unpacklo_epi32(rgb45, rgb67);
__m128i bx47 = _mm_unpackhi_epi32(rgb45, rgb67);
*r = _mm_unpacklo_epi64(rg03, rg47);
*g = _mm_unpackhi_epi64(rg03, rg47);
*b = _mm_unpacklo_epi64(bx03, bx47);
}
AI static void Store4(void* ptr, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
// TODO: AVX2 version
__m128i rg0123 = _mm_unpacklo_epi16(r.fVec, g.fVec), // r0 g0 r1 g1 r2 g2 r3 g3

View File

@ -591,6 +591,23 @@ STAGE_CTX(load_u16_be, const uint64_t**) {
a = (1.0f / 65535.0f) * SkNx_cast<float>((ah << 8) | (ah >> 8));
}
STAGE_CTX(load_rgb_u16_be, const uint16_t**) {
auto ptr = *ctx + 3*x;
const void* src = ptr;
uint16_t buf[N*3] = {0};
if (tail) {
memcpy(buf, src, tail*3*sizeof(uint16_t));
src = buf;
}
SkNh rh, gh, bh;
SkNh::Load3(src, &rh, &gh, &bh);
r = (1.0f / 65535.0f) * SkNx_cast<float>((rh << 8) | (rh >> 8));
g = (1.0f / 65535.0f) * SkNx_cast<float>((gh << 8) | (gh >> 8));
b = (1.0f / 65535.0f) * SkNx_cast<float>((bh << 8) | (bh >> 8));
a = 1.0f;
}
STAGE_CTX(load_tables, const LoadTablesContext*) {
auto ptr = (const uint32_t*)ctx->fSrc + x;
@ -621,6 +638,25 @@ STAGE_CTX(load_tables_u16_be, const LoadTablesContext*) {
a = (1.0f / 65535.0f) * SkNx_cast<float>((ah << 8) | (ah >> 8));
}
STAGE_CTX(load_tables_rgb_u16_be, const LoadTablesContext*) {
auto ptr = (const uint16_t*)ctx->fSrc + 3*x;
const void* src = ptr;
uint16_t buf[N*3] = {0};
if (tail) {
memcpy(buf, src, tail*3*sizeof(uint16_t));
src = buf;
}
SkNh rh, gh, bh;
SkNh::Load3(src, &rh, &gh, &bh);
// ctx->fSrc is big-endian, so "& 0xff" grabs the 8 most significant bits of each component.
r = gather(tail, ctx->fR, SkNx_cast<int>(rh & 0xff));
g = gather(tail, ctx->fG, SkNx_cast<int>(gh & 0xff));
b = gather(tail, ctx->fB, SkNx_cast<int>(bh & 0xff));
a = 1.0f;
}
STAGE_CTX(store_tables, const StoreTablesContext*) {
auto ptr = ctx->fDst + x;