Correct sRGB <-> linear everywhere.

This trims the SkPM4fPriv methods down to just foolproof methods.
(Anything trying to build these itself is probably wrong.)

Things like Sk4f srgb_to_linear(Sk4f) can't really exist anymore,
at least not efficiently, so this refactor is somewhat more invasive
than you might think.  Generally this means things using to_4f() are
also making a misstep... that's gone too.

It also does not make sense to try to play games with linear floats
with 255 bias any more.  That hack can't work with real sRGB coding.

Rather than update them, I've removed a couple of L32 xfermode fast
paths.  I'd even rather drop it entirely...

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2163683002
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Review-Url: https://codereview.chromium.org/2163683002
This commit is contained in:
mtklein 2016-07-20 18:10:07 -07:00 committed by Commit bot
parent 456bf30d32
commit 0c902473d6
11 changed files with 165 additions and 288 deletions

View File

@ -19,13 +19,39 @@
#define INNER_LOOPS 10 #define INNER_LOOPS 10
static inline void brute_srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
auto d = Sk4f_fromS32(*dst),
s = Sk4f_fromS32( src);
*dst = Sk4f_toS32(s + d * (1.0f - s[3]));
}
static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
if (src >= 0xFF000000) {
*dst = src;
return;
}
brute_srcover_srgb_srgb_1(dst, src);
}
static void brute_force_srcover_srgb_srgb( static void brute_force_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) { while (ndst > 0) {
int n = SkTMin(ndst, nsrc); int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i]))); brute_srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
}
static void trivial_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_srgb_srgb_1(dst++, src[i]);
} }
ndst -= n; ndst -= n;
} }
@ -36,8 +62,8 @@ static void best_non_simd_srcover_srgb_srgb(
uint64_t* ddst = reinterpret_cast<uint64_t*>(dst); uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) { auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
srcover_srgb8888_srgb_1(dst++, *src++); srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb8888_srgb_1(dst, *src); srcover_srgb_srgb_1(dst, *src);
}; };
while (ndst >0) { while (ndst >0) {
@ -62,24 +88,12 @@ static void best_non_simd_srcover_srgb_srgb(
} while (dsrc < end); } while (dsrc < end);
if ((count & 1) != 0) { if ((count & 1) != 0) {
srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst), srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),
*reinterpret_cast<const uint32_t*>(dsrc)); *reinterpret_cast<const uint32_t*>(dsrc));
} }
} }
} }
static void trivial_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_srgb8888_srgb_1(dst++, src[i]);
}
ndst -= n;
}
}
class SrcOverVSkOptsBruteForce { class SrcOverVSkOptsBruteForce {
public: public:
static SkString Name() { return SkString{"VSkOptsBruteForce"}; } static SkString Name() { return SkString{"VSkOptsBruteForce"}; }

View File

@ -106,10 +106,7 @@ SkColor SkHSVToColor(U8CPU a, const SkScalar hsv[3]) {
#include "SkHalf.h" #include "SkHalf.h"
SkPM4f SkPM4f::FromPMColor(SkPMColor c) { SkPM4f SkPM4f::FromPMColor(SkPMColor c) {
Sk4f value = to_4f_rgba(c); return From4f(swizzle_rb_if_bgra(Sk4f_fromL32(c)));
SkPM4f c4;
(value * Sk4f(1.0f / 255)).store(&c4);
return c4;
} }
SkColor4f SkPM4f::unpremul() const { SkColor4f SkPM4f::unpremul() const {
@ -152,21 +149,14 @@ void SkPM4f::assertIsUnit() const {
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
SkColor4f SkColor4f::FromColor(SkColor c) { SkColor4f SkColor4f::FromColor(SkColor bgra) {
Sk4f value = SkNx_shuffle<2,1,0,3>(SkNx_cast<float>(Sk4b::Load(&c))); SkColor4f rgba;
SkColor4f c4; swizzle_rb(Sk4f_fromS32(bgra)).store(rgba.vec());
(value * Sk4f(1.0f / 255)).store(&c4); return rgba;
c4.fR = srgb_to_linear(c4.fR);
c4.fG = srgb_to_linear(c4.fG);
c4.fB = srgb_to_linear(c4.fB);
return c4;
} }
SkColor SkColor4f::toSkColor() const { SkColor SkColor4f::toSkColor() const {
SkColor result; return Sk4f_toS32(swizzle_rb(Sk4f::Load(this->vec())));
Sk4f value = Sk4f(linear_to_srgb(fB), linear_to_srgb(fG), linear_to_srgb(fR), fA);
SkNx_cast<uint8_t>(value * Sk4f(255) + Sk4f(0.5f)).store(&result);
return result;
} }
SkColor4f SkColor4f::Pin(float r, float g, float b, float a) { SkColor4f SkColor4f::Pin(float r, float g, float b, float a) {

View File

@ -126,7 +126,7 @@ struct SkPMColorAdaptor {
return round(swizzle_rb_if_bgra(c4)); return round(swizzle_rb_if_bgra(c4));
} }
static Sk4f To4f(SkPMColor c) { static Sk4f To4f(SkPMColor c) {
return to_4f(c) * Sk4f(1.0f/255); return Sk4f_fromL32(c);
} }
}; };
void SkColorMatrixFilterRowMajor255::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const { void SkColorMatrixFilterRowMajor255::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const {

View File

@ -170,11 +170,10 @@ public:
PixelConverter(const SkPixmap& srcPixmap) { } PixelConverter(const SkPixmap& srcPixmap) { }
Sk4f toSk4f(Element pixel) const { Sk4f toSk4f(Element pixel) const {
float gray = pixel * (1.0f/255.0f); float gray = (gammaType == kSRGB_SkGammaType)
Sk4f result = Sk4f{gray, gray, gray, 1.0f}; ? sk_linear_from_srgb[pixel]
return gammaType == kSRGB_SkGammaType : pixel * (1/255.0f);
? srgb_to_linear(result) return {gray, gray, gray, 1.0f};
: result;
} }
}; };

View File

@ -10,142 +10,65 @@
#include "SkColorPriv.h" #include "SkColorPriv.h"
#include "SkPM4f.h" #include "SkPM4f.h"
#include "SkSRGB.h"
static inline float get_alpha(const Sk4f& f4) { static inline Sk4f set_alpha(const Sk4f& px, float alpha) {
return f4[SkPM4f::A]; return { px[0], px[1], px[2], alpha };
} }
static inline Sk4f set_alpha(const Sk4f& f4, float alpha) { static inline float get_alpha(const Sk4f& px) {
static_assert(3 == SkPM4f::A, ""); return px[3];
return Sk4f(f4[0], f4[1], f4[2], alpha);
} }
static inline uint32_t to_4b(const Sk4f& f4) {
uint32_t b4; static inline Sk4f Sk4f_fromL32(uint32_t px) {
SkNx_cast<uint8_t>(f4).store((uint8_t*)&b4); return SkNx_cast<float>(Sk4b::Load(&px)) * (1/255.0f);
return b4;
} }
static inline Sk4f to_4f(uint32_t b4) { static inline Sk4f Sk4f_fromS32(uint32_t px) {
return SkNx_cast<float>(Sk4b::Load((const uint8_t*)&b4)); return { sk_linear_from_srgb[(px >> 0) & 0xff],
sk_linear_from_srgb[(px >> 8) & 0xff],
sk_linear_from_srgb[(px >> 16) & 0xff],
(1/255.0f) * (px >> 24) };
} }
static inline Sk4f to_4f_rgba(uint32_t b4) { static inline uint32_t Sk4f_toL32(const Sk4f& px) {
return swizzle_rb_if_bgra(to_4f(b4)); uint32_t l32;
SkNx_cast<uint8_t>(Sk4f_round(px * 255.0f)).store(&l32);
return l32;
} }
static inline Sk4f srgb_to_linear(const Sk4f& s4) { static inline uint32_t Sk4f_toS32(const Sk4f& px) {
return set_alpha(s4 * s4, get_alpha(s4)); Sk4i rgb = sk_linear_to_srgb(px),
srgb = { rgb[0], rgb[1], rgb[2], (int)(255.0f * px[3] + 0.5f) };
uint32_t s32;
SkNx_cast<uint8_t>(srgb).store(&s32);
return s32;
} }
static inline Sk4f linear_to_srgb(const Sk4f& l4) {
return set_alpha(l4.rsqrt().invert(), get_alpha(l4));
}
static inline float srgb_to_linear(float x) { // SkColor handling:
return x * x;
}
static inline float linear_to_srgb(float x) {
return sqrtf(x);
}
static void assert_unit(float x) {
SkASSERT(x >= 0 && x <= 1);
}
static inline float exact_srgb_to_linear(float x) {
assert_unit(x);
float linear;
if (x <= 0.04045) {
linear = x / 12.92f;
} else {
linear = powf((x + 0.055f) / 1.055f, 2.4f);
}
assert_unit(linear);
return linear;
}
static inline float exact_linear_to_srgb(float x) {
assert_unit(x);
float srgb;
if (x <= 0.0031308f) {
srgb = x * 12.92f;
} else {
srgb = 1.055f * powf(x, 0.41666667f) - 0.055f;
}
assert_unit(srgb);
return srgb;
}
static inline Sk4f exact_srgb_to_linear(const Sk4f& x) {
Sk4f linear(exact_srgb_to_linear(x[0]),
exact_srgb_to_linear(x[1]),
exact_srgb_to_linear(x[2]), 1);
return set_alpha(linear, get_alpha(x));
}
static inline Sk4f exact_linear_to_srgb(const Sk4f& x) {
Sk4f srgb(exact_linear_to_srgb(x[0]),
exact_linear_to_srgb(x[1]),
exact_linear_to_srgb(x[2]), 1);
return set_alpha(srgb, get_alpha(x));
}
///////////////////////////////////////////////////////////////////////////////////////////////////
static inline Sk4f Sk4f_fromL32(uint32_t src) {
return to_4f(src) * Sk4f(1.0f/255);
}
static inline Sk4f Sk4f_fromS32(uint32_t src) {
return srgb_to_linear(to_4f(src) * Sk4f(1.0f/255));
}
// Color handling:
// SkColor has an ordering of (b, g, r, a) if cast to an Sk4f, so the code swizzles r and b to // SkColor has an ordering of (b, g, r, a) if cast to an Sk4f, so the code swizzles r and b to
// produce the needed (r, g, b, a) ordering. // produce the needed (r, g, b, a) ordering.
static inline Sk4f Sk4f_from_SkColor(SkColor color) { static inline Sk4f Sk4f_from_SkColor(SkColor color) {
return swizzle_rb(Sk4f_fromS32(color)); return swizzle_rb(Sk4f_fromS32(color));
} }
static inline uint32_t Sk4f_toL32(const Sk4f& x4) { static inline void assert_unit(float x) {
return to_4b(x4 * Sk4f(255) + Sk4f(0.5f)); SkASSERT(0 <= x && x <= 1);
} }
static inline uint32_t Sk4f_toS32(const Sk4f& x4) { static inline float exact_srgb_to_linear(float srgb) {
return to_4b(linear_to_srgb(x4) * Sk4f(255) + Sk4f(0.5f)); assert_unit(srgb);
} float linear;
if (srgb <= 0.04045) {
static inline Sk4f exact_Sk4f_fromS32(uint32_t src) { linear = srgb / 12.92f;
return exact_srgb_to_linear(to_4f(src) * Sk4f(1.0f/255)); } else {
} linear = powf((srgb + 0.055f) / 1.055f, 2.4f);
static inline uint32_t exact_Sk4f_toS32(const Sk4f& x4) {
return to_4b(exact_linear_to_srgb(x4) * Sk4f(255) + Sk4f(0.5f));
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// An implementation of SrcOver from bytes to bytes in linear space that takes advantage of the
// observation that the 255's cancel.
// invA = 1 - (As / 255);
//
// R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)
// => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)
// => R = sqrt(Rs^2 + Rd^2 * invA)
// Note: src is assumed to be linear.
static inline void srcover_blend_srgb8888_srgb_1(uint32_t* dst, const Sk4f& src) {
Sk4f d = srgb_to_linear(to_4f(*dst));
Sk4f invAlpha = 1.0f - Sk4f{src[SkPM4f::A]} * (1.0f / 255.0f);
Sk4f r = linear_to_srgb(src + d * invAlpha) + 0.5f;
*dst = to_4b(r);
}
static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) {
if ((~pixel & 0xFF000000) == 0) {
*dst = pixel;
} else if ((pixel & 0xFF000000) != 0) {
srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));
} }
assert_unit(linear);
return linear;
} }
#endif #endif

View File

@ -22,7 +22,7 @@ static void load_l32(const SkPixmap& src, int x, int y, SkPM4f span[], int count
SkASSERT(src.addr32(x + count - 1, y)); SkASSERT(src.addr32(x + count - 1, y));
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
(to_4f_rgba(addr[i]) * Sk4f(1.0f/255)).store(span[i].fVec); swizzle_rb_if_bgra(Sk4f_fromL32(addr[i])).store(span[i].fVec);
} }
} }
@ -32,7 +32,7 @@ static void load_s32(const SkPixmap& src, int x, int y, SkPM4f span[], int count
SkASSERT(src.addr32(x + count - 1, y)); SkASSERT(src.addr32(x + count - 1, y));
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
srgb_to_linear(to_4f_rgba(addr[i]) * Sk4f(1.0f/255)).store(span[i].fVec); swizzle_rb_if_bgra(Sk4f_fromS32(addr[i])).store(span[i].fVec);
} }
} }

View File

@ -35,44 +35,40 @@ template <DstType D> Sk4f load_dst(SkPMColor dstC) {
return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC); return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC);
} }
static Sk4f srgb_4b_to_linear_unit(SkPMColor dstC) {
return Sk4f_fromS32(dstC);
}
template <DstType D> uint32_t store_dst(const Sk4f& x4) { template <DstType D> uint32_t store_dst(const Sk4f& x4) {
return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4); return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4);
} }
static Sk4f linear_unit_to_srgb_255f(const Sk4f& l4) { static Sk4x4f load_4_srgb(const void* vptr) {
return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f); auto ptr = (const uint32_t*)vptr;
Sk4x4f rgba;
rgba.r = { sk_linear_from_srgb[(ptr[0] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[1] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[2] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] };
rgba.g = { sk_linear_from_srgb[(ptr[0] >> 8) & 0xff],
sk_linear_from_srgb[(ptr[1] >> 8) & 0xff],
sk_linear_from_srgb[(ptr[2] >> 8) & 0xff],
sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] };
rgba.b = { sk_linear_from_srgb[(ptr[0] >> 16) & 0xff],
sk_linear_from_srgb[(ptr[1] >> 16) & 0xff],
sk_linear_from_srgb[(ptr[2] >> 16) & 0xff],
sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
rgba.a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
return rgba;
} }
// Load 4 interlaced 8888 sRGB pixels as an Sk4x4f, transposed and converted to float.
static Sk4x4f load_4_srgb(const void* ptr) {
auto p = Sk4x4f::Transpose((const uint8_t*)ptr);
// Scale to [0,1].
p.r *= 1/255.0f;
p.g *= 1/255.0f;
p.b *= 1/255.0f;
p.a *= 1/255.0f;
// Apply approximate sRGB gamma correction to convert to linear (as if gamma were 2).
p.r *= p.r;
p.g *= p.g;
p.b *= p.b;
return p;
}
// Store an Sk4x4f back to 4 interlaced 8888 sRGB pixels.
static void store_4_srgb(void* ptr, const Sk4x4f& p) { static void store_4_srgb(void* ptr, const Sk4x4f& p) {
// Convert back to sRGB and [0,255], again approximating sRGB as gamma == 2. ( sk_linear_to_srgb(p.r) << 0
auto r = p.r.rsqrt().invert() * 255.0f + 0.5f, | sk_linear_to_srgb(p.g) << 8
g = p.g.rsqrt().invert() * 255.0f + 0.5f, | sk_linear_to_srgb(p.b) << 16
b = p.b.rsqrt().invert() * 255.0f + 0.5f, | Sk4f_round(255.0f*p.a) << 24).store(ptr);
a = p.a * 255.0f + 0.5f;
Sk4x4f{r,g,b,a}.transpose((uint8_t*)ptr);
} }
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
@ -197,41 +193,24 @@ template <DstType D> void src_1(const SkXfermode*, uint32_t dst[],
const Sk4f s4 = src->to4f_pmorder(); const Sk4f s4 = src->to4f_pmorder();
if (aa) { if (aa) {
if (D == kLinear_Dst) { SkPMColor srcColor = store_dst<D>(s4);
// operate in bias-255 space for src and dst while (count-- > 0) {
const Sk4f& s4_255 = s4 * Sk4f(255); SkAlpha cover = *aa++;
while (count >= 4) { switch (cover) {
Sk4f aa4 = SkNx_cast<float>(Sk4b::Load(aa)) * Sk4f(1/255.f); case 0xFF: {
Sk4f r0 = lerp(s4_255, to_4f(dst[0]), Sk4f(aa4[0])) + Sk4f(0.5f); *dst++ = srcColor;
Sk4f r1 = lerp(s4_255, to_4f(dst[1]), Sk4f(aa4[1])) + Sk4f(0.5f); break;
Sk4f r2 = lerp(s4_255, to_4f(dst[2]), Sk4f(aa4[2])) + Sk4f(0.5f); }
Sk4f r3 = lerp(s4_255, to_4f(dst[3]), Sk4f(aa4[3])) + Sk4f(0.5f); case 0x00: {
Sk4f_ToBytes((uint8_t*)dst, r0, r1, r2, r3); dst++;
break;
dst += 4; }
aa += 4; default: {
count -= 4; Sk4f d4 = load_dst<D>(*dst);
} *dst++ = store_dst<D>(lerp(s4, d4, cover));
} else { // kSRGB
SkPMColor srcColor = store_dst<D>(s4);
while (count-- > 0) {
SkAlpha cover = *aa++;
switch (cover) {
case 0xFF: {
*dst++ = srcColor;
break;
}
case 0x00: {
dst++;
break;
}
default: {
Sk4f d4 = load_dst<D>(*dst);
*dst++ = store_dst<D>(lerp(s4, d4, cover));
}
} }
} }
} // kSRGB }
} else { } else {
sk_memset32(dst, store_dst<D>(s4), count); sk_memset32(dst, store_dst<D>(s4), count);
} }
@ -274,18 +253,15 @@ template <DstType D> void srcover_n(const SkXfermode*, uint32_t dst[],
} else { } else {
while (count >= 4 && D == kSRGB_Dst) { while (count >= 4 && D == kSRGB_Dst) {
auto d = load_4_srgb(dst); auto d = load_4_srgb(dst);
auto s = Sk4x4f::Transpose(src->fVec); auto s = Sk4x4f::Transpose(src->fVec);
#if defined(SK_PMCOLOR_IS_BGRA) #if defined(SK_PMCOLOR_IS_BGRA)
SkTSwap(s.r, s.b); SkTSwap(s.r, s.b);
#endif #endif
auto invSA = 1.0f - s.a; auto invSA = 1.0f - s.a;
auto r = s.r + d.r * invSA, auto r = s.r + d.r * invSA,
g = s.g + d.g * invSA, g = s.g + d.g * invSA,
b = s.b + d.b * invSA, b = s.b + d.b * invSA,
a = s.a + d.a * invSA; a = s.a + d.a * invSA;
store_4_srgb(dst, Sk4x4f{r,g,b,a}); store_4_srgb(dst, Sk4x4f{r,g,b,a});
count -= 4; count -= 4;
dst += 4; dst += 4;
@ -322,23 +298,9 @@ static void srcover_linear_dst_1(const SkXfermode*, uint32_t dst[],
dst[i] = Sk4f_toL32(r4); dst[i] = Sk4f_toL32(r4);
} }
} else { } else {
const Sk4f s4_255 = s4 * Sk4f(255) + Sk4f(0.5f); // +0.5 to pre-bias for rounding
while (count >= 4) {
Sk4f d0 = to_4f(dst[0]);
Sk4f d1 = to_4f(dst[1]);
Sk4f d2 = to_4f(dst[2]);
Sk4f d3 = to_4f(dst[3]);
Sk4f_ToBytes((uint8_t*)dst,
s4_255 + d0 * dst_scale,
s4_255 + d1 * dst_scale,
s4_255 + d2 * dst_scale,
s4_255 + d3 * dst_scale);
dst += 4;
count -= 4;
}
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
Sk4f d4 = to_4f(dst[i]); Sk4f d4 = Sk4f_fromL32(dst[i]);
dst[i] = to_4b(s4_255 + d4 * dst_scale); dst[i] = Sk4f_toL32(s4 + d4 * dst_scale);
} }
} }
} }
@ -354,7 +316,8 @@ static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[],
if (0 == a) { if (0 == a) {
continue; continue;
} }
Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
Sk4f d4 = Sk4f_fromS32(dst[i]);
Sk4f r4; Sk4f r4;
if (a != 0xFF) { if (a != 0xFF) {
const Sk4f s4_aa = scale_by_coverage(s4, a); const Sk4f s4_aa = scale_by_coverage(s4, a);
@ -362,30 +325,27 @@ static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[],
} else { } else {
r4 = s4 + d4 * dst_scale; r4 = s4 + d4 * dst_scale;
} }
dst[i] = to_4b(linear_unit_to_srgb_255f(r4)); dst[i] = Sk4f_toS32(r4);
} }
} else { } else {
while (count >= 4) { while (count >= 4) {
auto d = load_4_srgb(dst); auto d = load_4_srgb(dst);
auto s = Sk4x4f{{ src->r() }, { src->g() }, { src->b() }, { src->a() }}; auto s = Sk4x4f{{ src->r() }, { src->g() }, { src->b() }, { src->a() }};
#if defined(SK_PMCOLOR_IS_BGRA) #if defined(SK_PMCOLOR_IS_BGRA)
SkTSwap(s.r, s.b); SkTSwap(s.r, s.b);
#endif #endif
auto invSA = 1.0f - s.a; auto invSA = 1.0f - s.a;
auto r = s.r + d.r * invSA, auto r = s.r + d.r * invSA,
g = s.g + d.g * invSA, g = s.g + d.g * invSA,
b = s.b + d.b * invSA, b = s.b + d.b * invSA,
a = s.a + d.a * invSA; a = s.a + d.a * invSA;
store_4_srgb(dst, Sk4x4f{r,g,b,a}); store_4_srgb(dst, Sk4x4f{r,g,b,a});
count -= 4; count -= 4;
dst += 4; dst += 4;
} }
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
Sk4f d4 = srgb_4b_to_linear_unit(dst[i]); Sk4f d4 = Sk4f_fromS32(dst[i]);
dst[i] = to_4b(linear_unit_to_srgb_255f(s4 + d4 * dst_scale)); dst[i] = Sk4f_toS32(s4 + d4 * dst_scale);
} }
} }
} }
@ -443,26 +403,13 @@ template <DstType D>
void src_1_lcd(uint32_t dst[], const SkPM4f* src, int count, const uint16_t lcd[]) { void src_1_lcd(uint32_t dst[], const SkPM4f* src, int count, const uint16_t lcd[]) {
const Sk4f s4 = src->to4f_pmorder(); const Sk4f s4 = src->to4f_pmorder();
if (D == kLinear_Dst) { for (int i = 0; i < count; ++i) {
// operate in bias-255 space for src and dst uint16_t rgb = lcd[i];
const Sk4f s4bias = s4 * Sk4f(255); if (0 == rgb) {
for (int i = 0; i < count; ++i) { continue;
uint16_t rgb = lcd[i];
if (0 == rgb) {
continue;
}
Sk4f d4bias = to_4f(dst[i]);
dst[i] = to_4b(lerp(s4bias, d4bias, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
}
} else { // kSRGB
for (int i = 0; i < count; ++i) {
uint16_t rgb = lcd[i];
if (0 == rgb) {
continue;
}
Sk4f d4 = load_dst<D>(dst[i]);
dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
} }
Sk4f d4 = load_dst<D>(dst[i]);
dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
} }
} }

View File

@ -109,18 +109,16 @@ struct DstTraits<DstType::S32, premul> {
using Type = SkPMColor; using Type = SkPMColor;
static Sk4f load(const SkPM4f& c) { static Sk4f load(const SkPM4f& c) {
// Prescaling by (255^2, 255^2, 255^2, 255) on load, to avoid a 255 multiply on return c.to4f_pmorder();
// each store (S32 conversion yields a uniform 255 factor).
return c.to4f_pmorder() * Sk4f(255 * 255, 255 * 255, 255 * 255, 255);
} }
static void store(const Sk4f& c, Type* dst) { static void store(const Sk4f& c, Type* dst) {
// FIXME: this assumes opaque colors. Handle unpremultiplication. // FIXME: this assumes opaque colors. Handle unpremultiplication.
*dst = to_4b(linear_to_srgb(PM::apply(c))); *dst = Sk4f_toS32(PM::apply(c));
} }
static void store(const Sk4f& c, Type* dst, int n) { static void store(const Sk4f& c, Type* dst, int n) {
sk_memset32(dst, to_4b(linear_to_srgb(PM::apply(c))), n); sk_memset32(dst, Sk4f_toS32(PM::apply(c)), n);
} }
static void store4x(const Sk4f& c0, const Sk4f& c1, static void store4x(const Sk4f& c0, const Sk4f& c1,

View File

@ -53,13 +53,10 @@ void ramp<DstType::S32, ApplyPremul::False>(const Sk4f& c, const Sk4f& dc, SkPMC
Sk4x4f c4x = Sk4x4f::Transpose(c, c + dc, c + dc * 2, c + dc * 3); Sk4x4f c4x = Sk4x4f::Transpose(c, c + dc, c + dc * 2, c + dc * 3);
while (n >= 4) { while (n >= 4) {
const Sk4x4f cx4s32 = { ( sk_linear_to_srgb(c4x.r) << 0
c4x.r.rsqrt().invert(), | sk_linear_to_srgb(c4x.g) << 8
c4x.g.rsqrt().invert(), | sk_linear_to_srgb(c4x.b) << 16
c4x.b.rsqrt().invert(), | Sk4f_round(255.0f*c4x.a) << 24).store(dst);
c4x.a
};
cx4s32.transpose((uint8_t*)dst);
c4x.r += dc4x.r; c4x.r += dc4x.r;
c4x.g += dc4x.g; c4x.g += dc4x.g;

View File

@ -21,19 +21,21 @@ ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
namespace SK_OPTS_NS { namespace SK_OPTS_NS {
static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) { static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
if ((~pixel & 0xFF000000) == 0) { if (src >= 0xFF000000) {
*dst = pixel; *dst = src;
} else if ((pixel & 0xFF000000) != 0) { return;
srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));
} }
auto d = Sk4f_fromS32(*dst),
s = Sk4f_fromS32( src);
*dst = Sk4f_toS32(s + d * (1.0f - s[3]));
} }
static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
srcover_srgb8888_srgb_1(dst++, *src++); srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb8888_srgb_1(dst++, *src++); srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb8888_srgb_1(dst++, *src++); srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb8888_srgb_1(dst, *src); srcover_srgb_srgb_1(dst , *src );
} }
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
@ -87,7 +89,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
count = count & 3; count = count & 3;
while (count-- > 0) { while (count-- > 0) {
srcover_srgb8888_srgb_1(dst++, *src++); srcover_srgb_srgb_1(dst++, *src++);
} }
} }
} }
@ -159,7 +161,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
count = count & 3; count = count & 3;
while (count-- > 0) { while (count-- > 0) {
srcover_srgb8888_srgb_1(dst++, *src++); srcover_srgb_srgb_1(dst++, *src++);
} }
} }
} }
@ -172,7 +174,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
int n = SkTMin(ndst, nsrc); int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
srcover_srgb8888_srgb_1(dst++, src[i]); srcover_srgb_srgb_1(dst++, src[i]);
} }
ndst -= n; ndst -= n;
} }

View File

@ -19,13 +19,19 @@
typedef void (*Blender)(uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); typedef void (*Blender)(uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
auto d = Sk4f_fromS32(*dst),
s = Sk4f_fromS32( src);
*dst = Sk4f_toS32(s + d * (1.0f - s[3]));
}
static void brute_force_srcover_srgb_srgb( static void brute_force_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) { while (ndst > 0) {
int n = SkTMin(ndst, nsrc); int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i]))); srcover_srgb_srgb_1(dst++, src[i]);
} }
ndst -= n; ndst -= n;
} }
@ -63,6 +69,7 @@ static void test_blender(std::string resourceName, skiatest::Reporter* reporter)
SkAutoTArray<uint32_t> testDst(width); SkAutoTArray<uint32_t> testDst(width);
for (int y = 0; y < pixmap.height(); y++) { for (int y = 0; y < pixmap.height(); y++) {
// TODO: zero is not the most interesting dst to test srcover...
sk_bzero(correctDst.get(), width * sizeof(uint32_t)); sk_bzero(correctDst.get(), width * sizeof(uint32_t));
sk_bzero(testDst.get(), width * sizeof(uint32_t)); sk_bzero(testDst.get(), width * sizeof(uint32_t));
brute_force_srcover_srgb_srgb(correctDst.get(), src, width, width); brute_force_srcover_srgb_srgb(correctDst.get(), src, width, width);