Apply linear gradient premul in 4f

(spotted by reed@)

Instead of converting to SkPMColor first, apply the premul in 4f and
then just store.

A modified bench shows a significant speedup:

  17/17  MB	1	690µs	695µs	695µs	700µs	0%	█▅▁▄▆▅▁▅▆▃	8888	gradient_linear_clamp_3color
  17/17  MB	1	832µs	837µs	839µs	870µs	1%	█▁▁▂▂▂▂▂▂▂	8888	gradient_linear_clamp_hicolor
  17/17  MB	1	651µs	659µs	665µs	701µs	3%	▆█▅▁▂▂▁▁▂▂	8888	gradient_linear_clamp

vs.

  17/17  MB	1	1.03ms	1.03ms	1.04ms	1.08ms	2%	██▇▁▁▁▁▁▁▁	8888	gradient_linear_clamp_3color
  17/17  MB	1	1.17ms	1.18ms	1.18ms	1.22ms	1%	█▄▂▁▁▁▁▁▁▁	8888	gradient_linear_clamp_hicolor
  17/17  MB	1	1.1ms	1.15ms	1.14ms	1.16ms	2%	▇▇▇▇▇▇██▁▁	8888	gradient_linear_clamp

R=reed@google.com,brianosman@google.com
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2409583003

Review-Url: https://codereview.chromium.org/2409583003
This commit is contained in:
fmalita 2016-12-06 08:57:47 -08:00 committed by Commit bot
parent 83926346f1
commit 0ce4f230eb

View File

@ -522,25 +522,62 @@ find_backward(const SkLinearGradient::LinearGradientContext::Rec rec[], float ti
return rec;
}
template <bool apply_alpha> SkPMColor trunc_from_255(const Sk4f& x) {
// As an optimization, we can apply the dither bias before interpolation -- but only when
// operating in premul space (apply_alpha == false). When apply_alpha == true, we must
// defer the bias application until after premul.
//
// The following two helpers encapsulate this logic: pre_bias is called before interpolation,
// and effects the bias when apply_alpha == false, while post_bias is called after premul and
// effects the bias for the apply_alpha == true case.
template <bool apply_alpha>
Sk4f pre_bias(const Sk4f& x, const Sk4f& bias) {
#ifdef SK_SUPPORT_LEGACY_GRADIENT_PREMUL
return x + bias;
#else
return apply_alpha ? x : x + bias;
#endif
}
template <bool apply_alpha>
Sk4f post_bias(const Sk4f& x, const Sk4f& bias) {
#ifdef SK_SUPPORT_LEGACY_GRADIENT_PREMUL
return x;
#else
return apply_alpha ? x + bias : x;
#endif
}
template <bool apply_alpha> SkPMColor trunc_from_255(const Sk4f& x, const Sk4f& bias) {
SkPMColor c;
#ifdef SK_SUPPORT_LEGACY_GRADIENT_PREMUL
SkNx_cast<uint8_t>(x).store(&c);
if (apply_alpha) {
c = SkPreMultiplyARGB(SkGetPackedA32(c), SkGetPackedR32(c),
SkGetPackedG32(c), SkGetPackedB32(c));
}
#else
Sk4f c4f255 = x;
if (apply_alpha) {
const float scale = x[SkPM4f::A] * (1 / 255.f);
c4f255 *= Sk4f(scale, scale, scale, 1);
}
SkNx_cast<uint8_t>(post_bias<apply_alpha>(c4f255, bias)).store(&c);
#endif
return c;
}
template <bool apply_alpha> void fill(SkPMColor dst[], int count,
const Sk4f& c4, const Sk4f& c4other) {
sk_memset32_dither(dst, trunc_from_255<apply_alpha>(c4),
trunc_from_255<apply_alpha>(c4other), count);
const Sk4f& c4, const Sk4f& bias0, const Sk4f& bias1) {
const SkPMColor c0 = trunc_from_255<apply_alpha>(pre_bias<apply_alpha>(c4, bias0), bias0);
const SkPMColor c1 = trunc_from_255<apply_alpha>(pre_bias<apply_alpha>(c4, bias1), bias1);
sk_memset32_dither(dst, c0, c1, count);
}
template <bool apply_alpha> void fill(SkPMColor dst[], int count, const Sk4f& c4) {
// Assumes that c4 does not need to be dithered.
sk_memset32(dst, trunc_from_255<apply_alpha>(c4), count);
sk_memset32(dst, trunc_from_255<apply_alpha>(c4, 0), count);
}
/*
@ -570,8 +607,8 @@ template <bool apply_alpha> void ramp(SkPMColor dstC[], int n, const Sk4f& c, co
const Sk4f& dither0, const Sk4f& dither1) {
Sk4f dc2 = dc + dc;
Sk4f dc4 = dc2 + dc2;
Sk4f cd0 = c + dither0;
Sk4f cd1 = c + dc + dither1;
Sk4f cd0 = pre_bias<apply_alpha>(c , dither0);
Sk4f cd1 = pre_bias<apply_alpha>(c + dc, dither1);
Sk4f cd2 = cd0 + dc2;
Sk4f cd3 = cd1 + dc2;
while (n >= 4) {
@ -579,10 +616,10 @@ template <bool apply_alpha> void ramp(SkPMColor dstC[], int n, const Sk4f& c, co
Sk4f_ToBytes((uint8_t*)dstC, cd0, cd1, cd2, cd3);
dstC += 4;
} else {
*dstC++ = trunc_from_255<apply_alpha>(cd0);
*dstC++ = trunc_from_255<apply_alpha>(cd1);
*dstC++ = trunc_from_255<apply_alpha>(cd2);
*dstC++ = trunc_from_255<apply_alpha>(cd3);
*dstC++ = trunc_from_255<apply_alpha>(cd0, dither0);
*dstC++ = trunc_from_255<apply_alpha>(cd1, dither1);
*dstC++ = trunc_from_255<apply_alpha>(cd2, dither0);
*dstC++ = trunc_from_255<apply_alpha>(cd3, dither1);
}
cd0 = cd0 + dc4;
cd1 = cd1 + dc4;
@ -591,12 +628,12 @@ template <bool apply_alpha> void ramp(SkPMColor dstC[], int n, const Sk4f& c, co
n -= 4;
}
if (n & 2) {
*dstC++ = trunc_from_255<apply_alpha>(cd0);
*dstC++ = trunc_from_255<apply_alpha>(cd1);
*dstC++ = trunc_from_255<apply_alpha>(cd0, dither0);
*dstC++ = trunc_from_255<apply_alpha>(cd1, dither1);
cd0 = cd0 + dc2;
}
if (n & 1) {
*dstC++ = trunc_from_255<apply_alpha>(cd0);
*dstC++ = trunc_from_255<apply_alpha>(cd0, dither0);
}
}
@ -742,9 +779,9 @@ void SkLinearGradient::LinearGradientContext::shade4_clamp(int x, int y, SkPMCol
const float pinFx = SkTPin(fx, 0.0f, 1.0f);
Sk4f c = lerp_color(pinFx, find_forward(fRecs.begin(), pinFx));
if (fApplyAlphaAfterInterp) {
fill<true>(dstC, count, c + dither0, c + dither1);
fill<true>(dstC, count, c, dither0, dither1);
} else {
fill<false>(dstC, count, c + dither0, c + dither1);
fill<false>(dstC, count, c, dither0, dither1);
}
return;
}