Reland "Respect full precision for RGB16 PNGs" (part 2)
This lands all the new xform hooks but no change to src/codec. So the new decode features are turned off. I'm relanding this in pieces to try to bisect a strange MSAN error. Original CL: https://skia-review.googlesource.com/c/7085/ BUG=skia: CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Debug-MSAN,Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD,Build-Ubuntu-Clang-x86_64-Release-Fast Change-Id: I451a2a29c73ca475e9e7a5ded58d4948d6b8be19 Reviewed-on: https://skia-review.googlesource.com/7277 Reviewed-by: Matt Sarett <msarett@google.com> Commit-Queue: Matt Sarett <msarett@google.com>
This commit is contained in:
parent
f720098671
commit
5bee0b6de6
@ -29,6 +29,7 @@ public:
|
||||
kBGRA_8888_ColorFormat,
|
||||
|
||||
// Unsigned, big-endian, 16-bit integer
|
||||
kRGB_U16_BE_ColorFormat, // Src only
|
||||
kRGBA_U16_BE_ColorFormat, // Src only
|
||||
|
||||
kRGBA_F16_ColorFormat, // Dst only
|
||||
|
@ -1104,7 +1104,10 @@ bool SkColorSpaceXform_XYZ<kCSM>
|
||||
}
|
||||
}
|
||||
|
||||
if (kRGBA_F32_ColorFormat == dstColorFormat || kRGBA_U16_BE_ColorFormat == srcColorFormat) {
|
||||
if (kRGBA_F32_ColorFormat == dstColorFormat ||
|
||||
kRGBA_U16_BE_ColorFormat == srcColorFormat ||
|
||||
kRGB_U16_BE_ColorFormat == srcColorFormat)
|
||||
{
|
||||
return this->applyPipeline(dstColorFormat, dst, srcColorFormat, src, len, alphaType);
|
||||
}
|
||||
|
||||
@ -1221,6 +1224,24 @@ bool SkColorSpaceXform_XYZ<kCSM>
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case kRGB_U16_BE_ColorFormat:
|
||||
switch (fSrcGamma) {
|
||||
case kLinear_SrcGamma:
|
||||
pipeline.append(SkRasterPipeline::load_rgb_u16_be, &src);
|
||||
break;
|
||||
case kSRGB_SrcGamma:
|
||||
pipeline.append(SkRasterPipeline::load_rgb_u16_be, &src);
|
||||
pipeline.append_from_srgb(kUnpremul_SkAlphaType);
|
||||
break;
|
||||
case kTable_SrcGamma:
|
||||
loadTables.fSrc = src;
|
||||
loadTables.fR = fSrcGammaTables[0];
|
||||
loadTables.fG = fSrcGammaTables[1];
|
||||
loadTables.fB = fSrcGammaTables[2];
|
||||
pipeline.append(SkRasterPipeline::load_tables_rgb_u16_be, &loadTables);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -31,6 +31,9 @@ bool SkColorSpaceXform_A2B::onApply(ColorFormat dstFormat, void* dst, ColorForma
|
||||
case kRGBA_U16_BE_ColorFormat:
|
||||
pipeline.append(SkRasterPipeline::load_u16_be, &src);
|
||||
break;
|
||||
case kRGB_U16_BE_ColorFormat:
|
||||
pipeline.append(SkRasterPipeline::load_rgb_u16_be, &src);
|
||||
break;
|
||||
default:
|
||||
SkCSXformPrintf("F16/F32 source color format not supported\n");
|
||||
return false;
|
||||
|
@ -68,6 +68,16 @@ struct SkNx {
|
||||
*c = SkNx{cl, ch};
|
||||
*d = SkNx{dl, dh};
|
||||
}
|
||||
AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
|
||||
auto ptr = (const char*)vptr;
|
||||
Half al, bl, cl,
|
||||
ah, bh, ch;
|
||||
Half::Load3(ptr , &al, &bl, &cl);
|
||||
Half::Load3(ptr + 3*N/2*sizeof(T), &ah, &bh, &ch);
|
||||
*a = SkNx{al, ah};
|
||||
*b = SkNx{bl, bh};
|
||||
*c = SkNx{cl, ch};
|
||||
}
|
||||
AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
|
||||
auto ptr = (char*)vptr;
|
||||
Half::Store4(ptr, a.fLo, b.fLo, c.fLo, d.fLo);
|
||||
@ -149,6 +159,12 @@ struct SkNx<1,T> {
|
||||
*c = Load(ptr + 2*sizeof(T));
|
||||
*d = Load(ptr + 3*sizeof(T));
|
||||
}
|
||||
AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
|
||||
auto ptr = (const char*)vptr;
|
||||
*a = Load(ptr + 0*sizeof(T));
|
||||
*b = Load(ptr + 1*sizeof(T));
|
||||
*c = Load(ptr + 2*sizeof(T));
|
||||
}
|
||||
AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
|
||||
auto ptr = (char*)vptr;
|
||||
a.store(ptr + 0*sizeof(T));
|
||||
|
@ -69,8 +69,9 @@
|
||||
M(load_565) M(store_565) \
|
||||
M(load_f16) M(store_f16) \
|
||||
M(load_8888) M(store_8888) \
|
||||
M(load_u16_be) \
|
||||
M(load_tables) M(load_tables_u16_be) M(store_tables) \
|
||||
M(load_u16_be) M(load_rgb_u16_be) \
|
||||
M(load_tables_u16_be) M(load_tables_rgb_u16_be) \
|
||||
M(load_tables) M(store_tables) \
|
||||
M(scale_u8) M(scale_1_float) \
|
||||
M(lerp_u8) M(lerp_565) M(lerp_1_float) \
|
||||
M(dstatop) M(dstin) M(dstout) M(dstover) \
|
||||
|
@ -241,7 +241,12 @@ public:
|
||||
*b = rgba.val[2];
|
||||
*a = rgba.val[3];
|
||||
}
|
||||
|
||||
AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
|
||||
uint16x4x3_t rgba = vld3_u16((const uint16_t*)ptr);
|
||||
*r = rgba.val[0];
|
||||
*g = rgba.val[1];
|
||||
*b = rgba.val[2];
|
||||
}
|
||||
AI static void Store4(void* dst, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
|
||||
uint16x4x4_t rgba = {{
|
||||
r.fVec,
|
||||
|
@ -271,6 +271,22 @@ public:
|
||||
*b = ba;
|
||||
*a = _mm_srli_si128(ba, 8);
|
||||
}
|
||||
AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
|
||||
// The idea here is to get 4 vectors that are R G B _ _ _ _ _.
|
||||
// The second load is at a funny location to make sure we don't read past
|
||||
// the bounds of memory. This is fine, we just need to shift it a little bit.
|
||||
const uint8_t* ptr8 = (const uint8_t*) ptr;
|
||||
__m128i rgb0 = _mm_loadu_si128((const __m128i*) (ptr8 + 0));
|
||||
__m128i rgb1 = _mm_srli_si128(rgb0, 3*2);
|
||||
__m128i rgb2 = _mm_srli_si128(_mm_loadu_si128((const __m128i*) (ptr8 + 4*2)), 2*2);
|
||||
__m128i rgb3 = _mm_srli_si128(rgb2, 3*2);
|
||||
|
||||
__m128i rrggbb01 = _mm_unpacklo_epi16(rgb0, rgb1);
|
||||
__m128i rrggbb23 = _mm_unpacklo_epi16(rgb2, rgb3);
|
||||
*r = _mm_unpacklo_epi32(rrggbb01, rrggbb23);
|
||||
*g = _mm_srli_si128(r->fVec, 4*2);
|
||||
*b = _mm_unpackhi_epi32(rrggbb01, rrggbb23);
|
||||
}
|
||||
AI static void Store4(void* dst, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
|
||||
__m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec);
|
||||
__m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec);
|
||||
@ -334,6 +350,32 @@ public:
|
||||
*b = _mm_unpacklo_epi64(ba0123, ba4567);
|
||||
*a = _mm_unpackhi_epi64(ba0123, ba4567);
|
||||
}
|
||||
AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
|
||||
// TODO: AVX2 version
|
||||
const uint8_t* ptr8 = (const uint8_t*) ptr;
|
||||
__m128i rgb0 = _mm_loadu_si128((const __m128i*) (ptr8 + 0*2));
|
||||
__m128i rgb1 = _mm_srli_si128(rgb0, 3*2);
|
||||
__m128i rgb2 = _mm_loadu_si128((const __m128i*) (ptr8 + 6*2));
|
||||
__m128i rgb3 = _mm_srli_si128(rgb2, 3*2);
|
||||
__m128i rgb4 = _mm_loadu_si128((const __m128i*) (ptr8 + 12*2));
|
||||
__m128i rgb5 = _mm_srli_si128(rgb4, 3*2);
|
||||
__m128i rgb6 = _mm_srli_si128(_mm_loadu_si128((const __m128i*) (ptr8 + 16*2)), 2*2);
|
||||
__m128i rgb7 = _mm_srli_si128(rgb6, 3*2);
|
||||
|
||||
__m128i rgb01 = _mm_unpacklo_epi16(rgb0, rgb1);
|
||||
__m128i rgb23 = _mm_unpacklo_epi16(rgb2, rgb3);
|
||||
__m128i rgb45 = _mm_unpacklo_epi16(rgb4, rgb5);
|
||||
__m128i rgb67 = _mm_unpacklo_epi16(rgb6, rgb7);
|
||||
|
||||
__m128i rg03 = _mm_unpacklo_epi32(rgb01, rgb23);
|
||||
__m128i bx03 = _mm_unpackhi_epi32(rgb01, rgb23);
|
||||
__m128i rg47 = _mm_unpacklo_epi32(rgb45, rgb67);
|
||||
__m128i bx47 = _mm_unpackhi_epi32(rgb45, rgb67);
|
||||
|
||||
*r = _mm_unpacklo_epi64(rg03, rg47);
|
||||
*g = _mm_unpackhi_epi64(rg03, rg47);
|
||||
*b = _mm_unpacklo_epi64(bx03, bx47);
|
||||
}
|
||||
AI static void Store4(void* ptr, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
|
||||
// TODO: AVX2 version
|
||||
__m128i rg0123 = _mm_unpacklo_epi16(r.fVec, g.fVec), // r0 g0 r1 g1 r2 g2 r3 g3
|
||||
|
@ -591,6 +591,23 @@ STAGE_CTX(load_u16_be, const uint64_t**) {
|
||||
a = (1.0f / 65535.0f) * SkNx_cast<float>((ah << 8) | (ah >> 8));
|
||||
}
|
||||
|
||||
STAGE_CTX(load_rgb_u16_be, const uint16_t**) {
|
||||
auto ptr = *ctx + 3*x;
|
||||
const void* src = ptr;
|
||||
uint16_t buf[N*3] = {0};
|
||||
if (tail) {
|
||||
memcpy(buf, src, tail*3*sizeof(uint16_t));
|
||||
src = buf;
|
||||
}
|
||||
|
||||
SkNh rh, gh, bh;
|
||||
SkNh::Load3(src, &rh, &gh, &bh);
|
||||
r = (1.0f / 65535.0f) * SkNx_cast<float>((rh << 8) | (rh >> 8));
|
||||
g = (1.0f / 65535.0f) * SkNx_cast<float>((gh << 8) | (gh >> 8));
|
||||
b = (1.0f / 65535.0f) * SkNx_cast<float>((bh << 8) | (bh >> 8));
|
||||
a = 1.0f;
|
||||
}
|
||||
|
||||
STAGE_CTX(load_tables, const LoadTablesContext*) {
|
||||
auto ptr = (const uint32_t*)ctx->fSrc + x;
|
||||
|
||||
@ -621,6 +638,25 @@ STAGE_CTX(load_tables_u16_be, const LoadTablesContext*) {
|
||||
a = (1.0f / 65535.0f) * SkNx_cast<float>((ah << 8) | (ah >> 8));
|
||||
}
|
||||
|
||||
STAGE_CTX(load_tables_rgb_u16_be, const LoadTablesContext*) {
|
||||
auto ptr = (const uint16_t*)ctx->fSrc + 3*x;
|
||||
const void* src = ptr;
|
||||
uint16_t buf[N*3] = {0};
|
||||
if (tail) {
|
||||
memcpy(buf, src, tail*3*sizeof(uint16_t));
|
||||
src = buf;
|
||||
}
|
||||
|
||||
SkNh rh, gh, bh;
|
||||
SkNh::Load3(src, &rh, &gh, &bh);
|
||||
|
||||
// ctx->fSrc is big-endian, so "& 0xff" grabs the 8 most significant bits of each component.
|
||||
r = gather(tail, ctx->fR, SkNx_cast<int>(rh & 0xff));
|
||||
g = gather(tail, ctx->fG, SkNx_cast<int>(gh & 0xff));
|
||||
b = gather(tail, ctx->fB, SkNx_cast<int>(bh & 0xff));
|
||||
a = 1.0f;
|
||||
}
|
||||
|
||||
STAGE_CTX(store_tables, const StoreTablesContext*) {
|
||||
auto ptr = ctx->fDst + x;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user