Faster 4f gradient premul path
Similar to https://codereview.chromium.org/2409583003/, perform the premul in 4f. It turns out it's even faster to avoid the 255 load multiplication in this case. Also includes some template plumbing because DstTraits<>::load now needs to be premul-aware (previously it wasn't). R=reed@google.com GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2416233002 Review-Url: https://codereview.chromium.org/2416233002
This commit is contained in:
parent
11abd8d6cb
commit
3a2e45a6ed
@ -326,7 +326,7 @@ GradientShaderBase4fContext::shadeSpanInternal(int x, int y,
|
|||||||
int count) const {
|
int count) const {
|
||||||
static const int kBufSize = 128;
|
static const int kBufSize = 128;
|
||||||
SkScalar ts[kBufSize];
|
SkScalar ts[kBufSize];
|
||||||
TSampler<dstType, tileMode> sampler(*this);
|
TSampler<dstType, premul, tileMode> sampler(*this);
|
||||||
|
|
||||||
SkASSERT(count > 0);
|
SkASSERT(count > 0);
|
||||||
do {
|
do {
|
||||||
@ -341,7 +341,7 @@ GradientShaderBase4fContext::shadeSpanInternal(int x, int y,
|
|||||||
} while (count > 0);
|
} while (count > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<DstType dstType, SkShader::TileMode tileMode>
|
template<DstType dstType, ApplyPremul premul, SkShader::TileMode tileMode>
|
||||||
class SkGradientShaderBase::GradientShaderBase4fContext::TSampler {
|
class SkGradientShaderBase::GradientShaderBase4fContext::TSampler {
|
||||||
public:
|
public:
|
||||||
TSampler(const GradientShaderBase4fContext& ctx)
|
TSampler(const GradientShaderBase4fContext& ctx)
|
||||||
@ -424,8 +424,8 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void loadIntervalData(const Interval* i) {
|
void loadIntervalData(const Interval* i) {
|
||||||
fCc = DstTraits<dstType>::load(i->fC0);
|
fCc = DstTraits<dstType, premul>::load(i->fC0);
|
||||||
fDc = DstTraits<dstType>::load(i->fDc);
|
fDc = DstTraits<dstType, premul>::load(i->fDc);
|
||||||
}
|
}
|
||||||
|
|
||||||
const Interval* fFirstInterval;
|
const Interval* fFirstInterval;
|
||||||
|
@ -60,7 +60,7 @@ private:
|
|||||||
void addMirrorIntervals(const SkGradientShaderBase&,
|
void addMirrorIntervals(const SkGradientShaderBase&,
|
||||||
const Sk4f& componentScale, bool reverse);
|
const Sk4f& componentScale, bool reverse);
|
||||||
|
|
||||||
template<DstType, SkShader::TileMode tileMode>
|
template<DstType, ApplyPremul, SkShader::TileMode tileMode>
|
||||||
class TSampler;
|
class TSampler;
|
||||||
|
|
||||||
template <DstType dstType, ApplyPremul premul>
|
template <DstType dstType, ApplyPremul premul>
|
||||||
|
@ -29,17 +29,6 @@ enum class DstType {
|
|||||||
F32, // Linear float. Used for shaders only.
|
F32, // Linear float. Used for shaders only.
|
||||||
};
|
};
|
||||||
|
|
||||||
template <ApplyPremul premul>
|
|
||||||
inline SkPMColor trunc_from_4f_255(const Sk4f& c) {
|
|
||||||
SkPMColor pmc;
|
|
||||||
SkNx_cast<uint8_t>(c).store(&pmc);
|
|
||||||
if (premul == ApplyPremul::True) {
|
|
||||||
pmc = SkPreMultiplyARGB(SkGetPackedA32(pmc), SkGetPackedR32(pmc),
|
|
||||||
SkGetPackedG32(pmc), SkGetPackedB32(pmc));
|
|
||||||
}
|
|
||||||
return pmc;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <ApplyPremul>
|
template <ApplyPremul>
|
||||||
struct PremulTraits;
|
struct PremulTraits;
|
||||||
|
|
||||||
@ -69,24 +58,34 @@ struct PremulTraits<ApplyPremul::True> {
|
|||||||
//
|
//
|
||||||
// - store4x() Store 4 Sk4f values to dest (opportunistic optimization).
|
// - store4x() Store 4 Sk4f values to dest (opportunistic optimization).
|
||||||
//
|
//
|
||||||
template <DstType, ApplyPremul premul = ApplyPremul::False>
|
template <DstType, ApplyPremul premul>
|
||||||
struct DstTraits;
|
struct DstTraits;
|
||||||
|
|
||||||
template <ApplyPremul premul>
|
template <ApplyPremul premul>
|
||||||
struct DstTraits<DstType::L32, premul> {
|
struct DstTraits<DstType::L32, premul> {
|
||||||
|
using PM = PremulTraits<premul>;
|
||||||
using Type = SkPMColor;
|
using Type = SkPMColor;
|
||||||
|
|
||||||
// For L32, we prescale the values by 255 to save a per-pixel multiplication.
|
// For L32, prescaling by 255 saves a per-pixel multiplication when premul is not needed.
|
||||||
static Sk4f load(const SkPM4f& c) {
|
static Sk4f load(const SkPM4f& c) {
|
||||||
return c.to4f_pmorder() * Sk4f(255);
|
return premul == ApplyPremul::False
|
||||||
|
? c.to4f_pmorder() * Sk4f(255)
|
||||||
|
: c.to4f_pmorder();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void store(const Sk4f& c, Type* dst) {
|
static void store(const Sk4f& c, Type* dst) {
|
||||||
*dst = trunc_from_4f_255<premul>(c);
|
if (premul == ApplyPremul::False) {
|
||||||
|
// c is prescaled by 255, just store.
|
||||||
|
SkNx_cast<uint8_t>(c).store(dst);
|
||||||
|
} else {
|
||||||
|
*dst = Sk4f_toL32(PM::apply(c));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void store(const Sk4f& c, Type* dst, int n) {
|
static void store(const Sk4f& c, Type* dst, int n) {
|
||||||
sk_memset32(dst, trunc_from_4f_255<premul>(c), n);
|
Type pmc;
|
||||||
|
store(c, &pmc);
|
||||||
|
sk_memset32(dst, pmc, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void store4x(const Sk4f& c0, const Sk4f& c1,
|
static void store4x(const Sk4f& c0, const Sk4f& c1,
|
||||||
|
@ -240,7 +240,7 @@ LinearGradient4fContext::shadeSpanInternal(int x, int y,
|
|||||||
&pt);
|
&pt);
|
||||||
const SkScalar fx = pinFx<tileMode>(pt.x());
|
const SkScalar fx = pinFx<tileMode>(pt.x());
|
||||||
const SkScalar dx = fDstToPos.getScaleX();
|
const SkScalar dx = fDstToPos.getScaleX();
|
||||||
LinearIntervalProcessor<dstType, tileMode> proc(fIntervals.begin(),
|
LinearIntervalProcessor<dstType, premul, tileMode> proc(fIntervals.begin(),
|
||||||
fIntervals.end() - 1,
|
fIntervals.end() - 1,
|
||||||
this->findInterval(fx),
|
this->findInterval(fx),
|
||||||
fx,
|
fx,
|
||||||
@ -274,7 +274,7 @@ LinearGradient4fContext::shadeSpanInternal(int x, int y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<DstType dstType, SkShader::TileMode tileMode>
|
template<DstType dstType, ApplyPremul premul, SkShader::TileMode tileMode>
|
||||||
class SkLinearGradient::
|
class SkLinearGradient::
|
||||||
LinearGradient4fContext::LinearIntervalProcessor {
|
LinearGradient4fContext::LinearIntervalProcessor {
|
||||||
public:
|
public:
|
||||||
@ -322,8 +322,8 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void compute_interval_props(SkScalar t) {
|
void compute_interval_props(SkScalar t) {
|
||||||
const Sk4f dC = DstTraits<dstType>::load(fInterval->fDc);
|
const Sk4f dC = DstTraits<dstType, premul>::load(fInterval->fDc);
|
||||||
fCc = DstTraits<dstType>::load(fInterval->fC0);
|
fCc = DstTraits<dstType, premul>::load(fInterval->fC0);
|
||||||
fCc = fCc + dC * Sk4f(t);
|
fCc = fCc + dC * Sk4f(t);
|
||||||
fDcDx = dC * fDx;
|
fDcDx = dC * fDx;
|
||||||
fZeroRamp = fIsVertical || fInterval->isZeroRamp();
|
fZeroRamp = fIsVertical || fInterval->isZeroRamp();
|
||||||
|
@ -27,7 +27,7 @@ protected:
|
|||||||
private:
|
private:
|
||||||
using INHERITED = GradientShaderBase4fContext;
|
using INHERITED = GradientShaderBase4fContext;
|
||||||
|
|
||||||
template<DstType, TileMode>
|
template<DstType, ApplyPremul, TileMode>
|
||||||
class LinearIntervalProcessor;
|
class LinearIntervalProcessor;
|
||||||
|
|
||||||
template <DstType dstType, ApplyPremul premul>
|
template <DstType dstType, ApplyPremul premul>
|
||||||
|
Loading…
Reference in New Issue
Block a user