Correct sRGB <-> linear everywhere.

This trims the SkPM4fPriv methods down to just foolproof methods.
(Anything trying to build these itself is probably wrong.)

Things like Sk4f srgb_to_linear(Sk4f) can't really exist anymore,
at least not efficiently, so this refactor is somewhat more invasive
than you might think.  Generally this means things using to_4f() are
also making a misstep... that's gone too.

It also does not make sense to try to play games with linear floats
with 255 bias any more.  That hack can't work with real sRGB coding.

Rather than update them, I've removed a couple of L32 xfermode fast
paths.  I'd even rather drop it entirely...

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2163683002
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Review-Url: https://codereview.chromium.org/2163683002
This commit is contained in:
mtklein 2016-07-20 18:10:07 -07:00 committed by Commit bot
parent 456bf30d32
commit 0c902473d6
11 changed files with 165 additions and 288 deletions

View File

@ -19,13 +19,39 @@
#define INNER_LOOPS 10
static inline void brute_srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
auto d = Sk4f_fromS32(*dst),
s = Sk4f_fromS32( src);
*dst = Sk4f_toS32(s + d * (1.0f - s[3]));
}
static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
if (src >= 0xFF000000) {
*dst = src;
return;
}
brute_srcover_srgb_srgb_1(dst, src);
}
static void brute_force_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
brute_srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
}
static void trivial_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
@ -36,8 +62,8 @@ static void best_non_simd_srcover_srgb_srgb(
uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
srcover_srgb8888_srgb_1(dst++, *src++);
srcover_srgb8888_srgb_1(dst, *src);
srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst, *src);
};
while (ndst >0) {
@ -62,24 +88,12 @@ static void best_non_simd_srcover_srgb_srgb(
} while (dsrc < end);
if ((count & 1) != 0) {
srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst),
*reinterpret_cast<const uint32_t*>(dsrc));
srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),
*reinterpret_cast<const uint32_t*>(dsrc));
}
}
}
static void trivial_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_srgb8888_srgb_1(dst++, src[i]);
}
ndst -= n;
}
}
class SrcOverVSkOptsBruteForce {
public:
static SkString Name() { return SkString{"VSkOptsBruteForce"}; }

View File

@ -106,10 +106,7 @@ SkColor SkHSVToColor(U8CPU a, const SkScalar hsv[3]) {
#include "SkHalf.h"
SkPM4f SkPM4f::FromPMColor(SkPMColor c) {
Sk4f value = to_4f_rgba(c);
SkPM4f c4;
(value * Sk4f(1.0f / 255)).store(&c4);
return c4;
return From4f(swizzle_rb_if_bgra(Sk4f_fromL32(c)));
}
SkColor4f SkPM4f::unpremul() const {
@ -152,21 +149,14 @@ void SkPM4f::assertIsUnit() const {
///////////////////////////////////////////////////////////////////////////////////////////////////
SkColor4f SkColor4f::FromColor(SkColor c) {
Sk4f value = SkNx_shuffle<2,1,0,3>(SkNx_cast<float>(Sk4b::Load(&c)));
SkColor4f c4;
(value * Sk4f(1.0f / 255)).store(&c4);
c4.fR = srgb_to_linear(c4.fR);
c4.fG = srgb_to_linear(c4.fG);
c4.fB = srgb_to_linear(c4.fB);
return c4;
SkColor4f SkColor4f::FromColor(SkColor bgra) {
SkColor4f rgba;
swizzle_rb(Sk4f_fromS32(bgra)).store(rgba.vec());
return rgba;
}
SkColor SkColor4f::toSkColor() const {
SkColor result;
Sk4f value = Sk4f(linear_to_srgb(fB), linear_to_srgb(fG), linear_to_srgb(fR), fA);
SkNx_cast<uint8_t>(value * Sk4f(255) + Sk4f(0.5f)).store(&result);
return result;
return Sk4f_toS32(swizzle_rb(Sk4f::Load(this->vec())));
}
SkColor4f SkColor4f::Pin(float r, float g, float b, float a) {

View File

@ -126,7 +126,7 @@ struct SkPMColorAdaptor {
return round(swizzle_rb_if_bgra(c4));
}
static Sk4f To4f(SkPMColor c) {
return to_4f(c) * Sk4f(1.0f/255);
return Sk4f_fromL32(c);
}
};
void SkColorMatrixFilterRowMajor255::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const {

View File

@ -170,11 +170,10 @@ public:
PixelConverter(const SkPixmap& srcPixmap) { }
Sk4f toSk4f(Element pixel) const {
float gray = pixel * (1.0f/255.0f);
Sk4f result = Sk4f{gray, gray, gray, 1.0f};
return gammaType == kSRGB_SkGammaType
? srgb_to_linear(result)
: result;
float gray = (gammaType == kSRGB_SkGammaType)
? sk_linear_from_srgb[pixel]
: pixel * (1/255.0f);
return {gray, gray, gray, 1.0f};
}
};

View File

@ -10,142 +10,65 @@
#include "SkColorPriv.h"
#include "SkPM4f.h"
#include "SkSRGB.h"
static inline float get_alpha(const Sk4f& f4) {
return f4[SkPM4f::A];
static inline Sk4f set_alpha(const Sk4f& px, float alpha) {
return { px[0], px[1], px[2], alpha };
}
static inline Sk4f set_alpha(const Sk4f& f4, float alpha) {
static_assert(3 == SkPM4f::A, "");
return Sk4f(f4[0], f4[1], f4[2], alpha);
static inline float get_alpha(const Sk4f& px) {
return px[3];
}
static inline uint32_t to_4b(const Sk4f& f4) {
uint32_t b4;
SkNx_cast<uint8_t>(f4).store((uint8_t*)&b4);
return b4;
static inline Sk4f Sk4f_fromL32(uint32_t px) {
return SkNx_cast<float>(Sk4b::Load(&px)) * (1/255.0f);
}
static inline Sk4f to_4f(uint32_t b4) {
return SkNx_cast<float>(Sk4b::Load((const uint8_t*)&b4));
static inline Sk4f Sk4f_fromS32(uint32_t px) {
return { sk_linear_from_srgb[(px >> 0) & 0xff],
sk_linear_from_srgb[(px >> 8) & 0xff],
sk_linear_from_srgb[(px >> 16) & 0xff],
(1/255.0f) * (px >> 24) };
}
static inline Sk4f to_4f_rgba(uint32_t b4) {
return swizzle_rb_if_bgra(to_4f(b4));
static inline uint32_t Sk4f_toL32(const Sk4f& px) {
uint32_t l32;
SkNx_cast<uint8_t>(Sk4f_round(px * 255.0f)).store(&l32);
return l32;
}
static inline Sk4f srgb_to_linear(const Sk4f& s4) {
return set_alpha(s4 * s4, get_alpha(s4));
static inline uint32_t Sk4f_toS32(const Sk4f& px) {
Sk4i rgb = sk_linear_to_srgb(px),
srgb = { rgb[0], rgb[1], rgb[2], (int)(255.0f * px[3] + 0.5f) };
uint32_t s32;
SkNx_cast<uint8_t>(srgb).store(&s32);
return s32;
}
static inline Sk4f linear_to_srgb(const Sk4f& l4) {
return set_alpha(l4.rsqrt().invert(), get_alpha(l4));
}
static inline float srgb_to_linear(float x) {
return x * x;
}
static inline float linear_to_srgb(float x) {
return sqrtf(x);
}
static void assert_unit(float x) {
SkASSERT(x >= 0 && x <= 1);
}
static inline float exact_srgb_to_linear(float x) {
assert_unit(x);
float linear;
if (x <= 0.04045) {
linear = x / 12.92f;
} else {
linear = powf((x + 0.055f) / 1.055f, 2.4f);
}
assert_unit(linear);
return linear;
}
static inline float exact_linear_to_srgb(float x) {
assert_unit(x);
float srgb;
if (x <= 0.0031308f) {
srgb = x * 12.92f;
} else {
srgb = 1.055f * powf(x, 0.41666667f) - 0.055f;
}
assert_unit(srgb);
return srgb;
}
static inline Sk4f exact_srgb_to_linear(const Sk4f& x) {
Sk4f linear(exact_srgb_to_linear(x[0]),
exact_srgb_to_linear(x[1]),
exact_srgb_to_linear(x[2]), 1);
return set_alpha(linear, get_alpha(x));
}
static inline Sk4f exact_linear_to_srgb(const Sk4f& x) {
Sk4f srgb(exact_linear_to_srgb(x[0]),
exact_linear_to_srgb(x[1]),
exact_linear_to_srgb(x[2]), 1);
return set_alpha(srgb, get_alpha(x));
}
///////////////////////////////////////////////////////////////////////////////////////////////////
static inline Sk4f Sk4f_fromL32(uint32_t src) {
return to_4f(src) * Sk4f(1.0f/255);
}
static inline Sk4f Sk4f_fromS32(uint32_t src) {
return srgb_to_linear(to_4f(src) * Sk4f(1.0f/255));
}
// Color handling:
// SkColor handling:
// SkColor has an ordering of (b, g, r, a) if cast to an Sk4f, so the code swizzles r and b to
// produce the needed (r, g, b, a) ordering.
static inline Sk4f Sk4f_from_SkColor(SkColor color) {
return swizzle_rb(Sk4f_fromS32(color));
}
static inline uint32_t Sk4f_toL32(const Sk4f& x4) {
return to_4b(x4 * Sk4f(255) + Sk4f(0.5f));
static inline void assert_unit(float x) {
SkASSERT(0 <= x && x <= 1);
}
static inline uint32_t Sk4f_toS32(const Sk4f& x4) {
return to_4b(linear_to_srgb(x4) * Sk4f(255) + Sk4f(0.5f));
}
static inline Sk4f exact_Sk4f_fromS32(uint32_t src) {
return exact_srgb_to_linear(to_4f(src) * Sk4f(1.0f/255));
}
static inline uint32_t exact_Sk4f_toS32(const Sk4f& x4) {
return to_4b(exact_linear_to_srgb(x4) * Sk4f(255) + Sk4f(0.5f));
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// An implementation of SrcOver from bytes to bytes in linear space that takes advantage of the
// observation that the 255's cancel.
// invA = 1 - (As / 255);
//
// R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)
// => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)
// => R = sqrt(Rs^2 + Rd^2 * invA)
// Note: src is assumed to be linear.
static inline void srcover_blend_srgb8888_srgb_1(uint32_t* dst, const Sk4f& src) {
Sk4f d = srgb_to_linear(to_4f(*dst));
Sk4f invAlpha = 1.0f - Sk4f{src[SkPM4f::A]} * (1.0f / 255.0f);
Sk4f r = linear_to_srgb(src + d * invAlpha) + 0.5f;
*dst = to_4b(r);
}
static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) {
if ((~pixel & 0xFF000000) == 0) {
*dst = pixel;
} else if ((pixel & 0xFF000000) != 0) {
srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));
static inline float exact_srgb_to_linear(float srgb) {
assert_unit(srgb);
float linear;
if (srgb <= 0.04045) {
linear = srgb / 12.92f;
} else {
linear = powf((srgb + 0.055f) / 1.055f, 2.4f);
}
assert_unit(linear);
return linear;
}
#endif

View File

@ -22,7 +22,7 @@ static void load_l32(const SkPixmap& src, int x, int y, SkPM4f span[], int count
SkASSERT(src.addr32(x + count - 1, y));
for (int i = 0; i < count; ++i) {
(to_4f_rgba(addr[i]) * Sk4f(1.0f/255)).store(span[i].fVec);
swizzle_rb_if_bgra(Sk4f_fromL32(addr[i])).store(span[i].fVec);
}
}
@ -32,7 +32,7 @@ static void load_s32(const SkPixmap& src, int x, int y, SkPM4f span[], int count
SkASSERT(src.addr32(x + count - 1, y));
for (int i = 0; i < count; ++i) {
srgb_to_linear(to_4f_rgba(addr[i]) * Sk4f(1.0f/255)).store(span[i].fVec);
swizzle_rb_if_bgra(Sk4f_fromS32(addr[i])).store(span[i].fVec);
}
}

View File

@ -35,44 +35,40 @@ template <DstType D> Sk4f load_dst(SkPMColor dstC) {
return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC);
}
static Sk4f srgb_4b_to_linear_unit(SkPMColor dstC) {
return Sk4f_fromS32(dstC);
}
template <DstType D> uint32_t store_dst(const Sk4f& x4) {
return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4);
}
static Sk4f linear_unit_to_srgb_255f(const Sk4f& l4) {
return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f);
static Sk4x4f load_4_srgb(const void* vptr) {
auto ptr = (const uint32_t*)vptr;
Sk4x4f rgba;
rgba.r = { sk_linear_from_srgb[(ptr[0] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[1] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[2] >> 0) & 0xff],
sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] };
rgba.g = { sk_linear_from_srgb[(ptr[0] >> 8) & 0xff],
sk_linear_from_srgb[(ptr[1] >> 8) & 0xff],
sk_linear_from_srgb[(ptr[2] >> 8) & 0xff],
sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] };
rgba.b = { sk_linear_from_srgb[(ptr[0] >> 16) & 0xff],
sk_linear_from_srgb[(ptr[1] >> 16) & 0xff],
sk_linear_from_srgb[(ptr[2] >> 16) & 0xff],
sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
rgba.a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
return rgba;
}
// Load 4 interlaced 8888 sRGB pixels as an Sk4x4f, transposed and converted to float.
static Sk4x4f load_4_srgb(const void* ptr) {
auto p = Sk4x4f::Transpose((const uint8_t*)ptr);
// Scale to [0,1].
p.r *= 1/255.0f;
p.g *= 1/255.0f;
p.b *= 1/255.0f;
p.a *= 1/255.0f;
// Apply approximate sRGB gamma correction to convert to linear (as if gamma were 2).
p.r *= p.r;
p.g *= p.g;
p.b *= p.b;
return p;
}
// Store an Sk4x4f back to 4 interlaced 8888 sRGB pixels.
static void store_4_srgb(void* ptr, const Sk4x4f& p) {
// Convert back to sRGB and [0,255], again approximating sRGB as gamma == 2.
auto r = p.r.rsqrt().invert() * 255.0f + 0.5f,
g = p.g.rsqrt().invert() * 255.0f + 0.5f,
b = p.b.rsqrt().invert() * 255.0f + 0.5f,
a = p.a * 255.0f + 0.5f;
Sk4x4f{r,g,b,a}.transpose((uint8_t*)ptr);
( sk_linear_to_srgb(p.r) << 0
| sk_linear_to_srgb(p.g) << 8
| sk_linear_to_srgb(p.b) << 16
| Sk4f_round(255.0f*p.a) << 24).store(ptr);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
@ -197,41 +193,24 @@ template <DstType D> void src_1(const SkXfermode*, uint32_t dst[],
const Sk4f s4 = src->to4f_pmorder();
if (aa) {
if (D == kLinear_Dst) {
// operate in bias-255 space for src and dst
const Sk4f& s4_255 = s4 * Sk4f(255);
while (count >= 4) {
Sk4f aa4 = SkNx_cast<float>(Sk4b::Load(aa)) * Sk4f(1/255.f);
Sk4f r0 = lerp(s4_255, to_4f(dst[0]), Sk4f(aa4[0])) + Sk4f(0.5f);
Sk4f r1 = lerp(s4_255, to_4f(dst[1]), Sk4f(aa4[1])) + Sk4f(0.5f);
Sk4f r2 = lerp(s4_255, to_4f(dst[2]), Sk4f(aa4[2])) + Sk4f(0.5f);
Sk4f r3 = lerp(s4_255, to_4f(dst[3]), Sk4f(aa4[3])) + Sk4f(0.5f);
Sk4f_ToBytes((uint8_t*)dst, r0, r1, r2, r3);
dst += 4;
aa += 4;
count -= 4;
}
} else { // kSRGB
SkPMColor srcColor = store_dst<D>(s4);
while (count-- > 0) {
SkAlpha cover = *aa++;
switch (cover) {
case 0xFF: {
*dst++ = srcColor;
break;
}
case 0x00: {
dst++;
break;
}
default: {
Sk4f d4 = load_dst<D>(*dst);
*dst++ = store_dst<D>(lerp(s4, d4, cover));
}
SkPMColor srcColor = store_dst<D>(s4);
while (count-- > 0) {
SkAlpha cover = *aa++;
switch (cover) {
case 0xFF: {
*dst++ = srcColor;
break;
}
case 0x00: {
dst++;
break;
}
default: {
Sk4f d4 = load_dst<D>(*dst);
*dst++ = store_dst<D>(lerp(s4, d4, cover));
}
}
} // kSRGB
}
} else {
sk_memset32(dst, store_dst<D>(s4), count);
}
@ -274,18 +253,15 @@ template <DstType D> void srcover_n(const SkXfermode*, uint32_t dst[],
} else {
while (count >= 4 && D == kSRGB_Dst) {
auto d = load_4_srgb(dst);
auto s = Sk4x4f::Transpose(src->fVec);
#if defined(SK_PMCOLOR_IS_BGRA)
SkTSwap(s.r, s.b);
#endif
auto invSA = 1.0f - s.a;
auto r = s.r + d.r * invSA,
g = s.g + d.g * invSA,
b = s.b + d.b * invSA,
a = s.a + d.a * invSA;
store_4_srgb(dst, Sk4x4f{r,g,b,a});
count -= 4;
dst += 4;
@ -322,23 +298,9 @@ static void srcover_linear_dst_1(const SkXfermode*, uint32_t dst[],
dst[i] = Sk4f_toL32(r4);
}
} else {
const Sk4f s4_255 = s4 * Sk4f(255) + Sk4f(0.5f); // +0.5 to pre-bias for rounding
while (count >= 4) {
Sk4f d0 = to_4f(dst[0]);
Sk4f d1 = to_4f(dst[1]);
Sk4f d2 = to_4f(dst[2]);
Sk4f d3 = to_4f(dst[3]);
Sk4f_ToBytes((uint8_t*)dst,
s4_255 + d0 * dst_scale,
s4_255 + d1 * dst_scale,
s4_255 + d2 * dst_scale,
s4_255 + d3 * dst_scale);
dst += 4;
count -= 4;
}
for (int i = 0; i < count; ++i) {
Sk4f d4 = to_4f(dst[i]);
dst[i] = to_4b(s4_255 + d4 * dst_scale);
Sk4f d4 = Sk4f_fromL32(dst[i]);
dst[i] = Sk4f_toL32(s4 + d4 * dst_scale);
}
}
}
@ -354,7 +316,8 @@ static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[],
if (0 == a) {
continue;
}
Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
Sk4f d4 = Sk4f_fromS32(dst[i]);
Sk4f r4;
if (a != 0xFF) {
const Sk4f s4_aa = scale_by_coverage(s4, a);
@ -362,30 +325,27 @@ static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[],
} else {
r4 = s4 + d4 * dst_scale;
}
dst[i] = to_4b(linear_unit_to_srgb_255f(r4));
dst[i] = Sk4f_toS32(r4);
}
} else {
while (count >= 4) {
auto d = load_4_srgb(dst);
auto s = Sk4x4f{{ src->r() }, { src->g() }, { src->b() }, { src->a() }};
#if defined(SK_PMCOLOR_IS_BGRA)
SkTSwap(s.r, s.b);
#endif
auto invSA = 1.0f - s.a;
auto r = s.r + d.r * invSA,
g = s.g + d.g * invSA,
b = s.b + d.b * invSA,
a = s.a + d.a * invSA;
store_4_srgb(dst, Sk4x4f{r,g,b,a});
count -= 4;
dst += 4;
}
for (int i = 0; i < count; ++i) {
Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
dst[i] = to_4b(linear_unit_to_srgb_255f(s4 + d4 * dst_scale));
Sk4f d4 = Sk4f_fromS32(dst[i]);
dst[i] = Sk4f_toS32(s4 + d4 * dst_scale);
}
}
}
@ -443,26 +403,13 @@ template <DstType D>
void src_1_lcd(uint32_t dst[], const SkPM4f* src, int count, const uint16_t lcd[]) {
const Sk4f s4 = src->to4f_pmorder();
if (D == kLinear_Dst) {
// operate in bias-255 space for src and dst
const Sk4f s4bias = s4 * Sk4f(255);
for (int i = 0; i < count; ++i) {
uint16_t rgb = lcd[i];
if (0 == rgb) {
continue;
}
Sk4f d4bias = to_4f(dst[i]);
dst[i] = to_4b(lerp(s4bias, d4bias, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
}
} else { // kSRGB
for (int i = 0; i < count; ++i) {
uint16_t rgb = lcd[i];
if (0 == rgb) {
continue;
}
Sk4f d4 = load_dst<D>(dst[i]);
dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
for (int i = 0; i < count; ++i) {
uint16_t rgb = lcd[i];
if (0 == rgb) {
continue;
}
Sk4f d4 = load_dst<D>(dst[i]);
dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
}
}

View File

@ -109,18 +109,16 @@ struct DstTraits<DstType::S32, premul> {
using Type = SkPMColor;
static Sk4f load(const SkPM4f& c) {
// Prescaling by (255^2, 255^2, 255^2, 255) on load, to avoid a 255 multiply on
// each store (S32 conversion yields a uniform 255 factor).
return c.to4f_pmorder() * Sk4f(255 * 255, 255 * 255, 255 * 255, 255);
return c.to4f_pmorder();
}
static void store(const Sk4f& c, Type* dst) {
// FIXME: this assumes opaque colors. Handle unpremultiplication.
*dst = to_4b(linear_to_srgb(PM::apply(c)));
*dst = Sk4f_toS32(PM::apply(c));
}
static void store(const Sk4f& c, Type* dst, int n) {
sk_memset32(dst, to_4b(linear_to_srgb(PM::apply(c))), n);
sk_memset32(dst, Sk4f_toS32(PM::apply(c)), n);
}
static void store4x(const Sk4f& c0, const Sk4f& c1,

View File

@ -53,13 +53,10 @@ void ramp<DstType::S32, ApplyPremul::False>(const Sk4f& c, const Sk4f& dc, SkPMC
Sk4x4f c4x = Sk4x4f::Transpose(c, c + dc, c + dc * 2, c + dc * 3);
while (n >= 4) {
const Sk4x4f cx4s32 = {
c4x.r.rsqrt().invert(),
c4x.g.rsqrt().invert(),
c4x.b.rsqrt().invert(),
c4x.a
};
cx4s32.transpose((uint8_t*)dst);
( sk_linear_to_srgb(c4x.r) << 0
| sk_linear_to_srgb(c4x.g) << 8
| sk_linear_to_srgb(c4x.b) << 16
| Sk4f_round(255.0f*c4x.a) << 24).store(dst);
c4x.r += dc4x.r;
c4x.g += dc4x.g;

View File

@ -21,19 +21,21 @@ ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
namespace SK_OPTS_NS {
static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) {
if ((~pixel & 0xFF000000) == 0) {
*dst = pixel;
} else if ((pixel & 0xFF000000) != 0) {
srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));
static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
if (src >= 0xFF000000) {
*dst = src;
return;
}
auto d = Sk4f_fromS32(*dst),
s = Sk4f_fromS32( src);
*dst = Sk4f_toS32(s + d * (1.0f - s[3]));
}
static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
srcover_srgb8888_srgb_1(dst++, *src++);
srcover_srgb8888_srgb_1(dst++, *src++);
srcover_srgb8888_srgb_1(dst++, *src++);
srcover_srgb8888_srgb_1(dst, *src);
srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst , *src );
}
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
@ -87,7 +89,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
count = count & 3;
while (count-- > 0) {
srcover_srgb8888_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst++, *src++);
}
}
}
@ -159,7 +161,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
count = count & 3;
while (count-- > 0) {
srcover_srgb8888_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst++, *src++);
}
}
}
@ -172,7 +174,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_srgb8888_srgb_1(dst++, src[i]);
srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}

View File

@ -19,13 +19,19 @@
typedef void (*Blender)(uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
auto d = Sk4f_fromS32(*dst),
s = Sk4f_fromS32( src);
*dst = Sk4f_toS32(s + d * (1.0f - s[3]));
}
static void brute_force_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
@ -63,6 +69,7 @@ static void test_blender(std::string resourceName, skiatest::Reporter* reporter)
SkAutoTArray<uint32_t> testDst(width);
for (int y = 0; y < pixmap.height(); y++) {
// TODO: zero is not the most interesting dst to test srcover...
sk_bzero(correctDst.get(), width * sizeof(uint32_t));
sk_bzero(testDst.get(), width * sizeof(uint32_t));
brute_force_srcover_srgb_srgb(correctDst.get(), src, width, width);