Add Matrix colorfilter pipeline stages.

This breaks the color filter down into a couple logical steps:
  - go to unpremul
  - apply the 4x5 matrix
  - clamp to [0,1]
  - go to premul

Because we already have handy premul clamp stages, we swap the order of clamp and premul.  This is lossless.

While adding our stages to the pipeline, we analyze the matrix to see if we can skip any steps:
  - we can skip unpremul if the shader is opaque (alphas are all 1 ~~~> we're already unpremul);
  - we can skip the premul back if the color filter always produces opaque (here, are the inputs opaque and do we keep them that way, but we could also check for an explicit 0 0 0 0 1 alpha row);
  - we can skip the clamp_0 if the matrix can never produce a value less than 0;
  - we can skip the clamp_1 if the matrix can never produce a value greater than 1.

The only thing that should seem missing is per-pixel alpha checks.  We don't do those here, but instead make up for it by operating on 4-8 pixels at a time.
We don't split the 4x5 matrix into a 4x4 and 1x4 translate.  We could, but when we have FMA (new x86, all ARMv8) we might as well work the translate for free into the FMAs.

This makes gm/fadefilter.cpp draw differently in sRGB and F16 modes, bringing them in line with the GPU sRGB and GPU f16 configs.  It's unclear to me what was wrong with the old CPU implementation.

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=4346
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Change-Id: I14082ded8fb8d63354167d9e6b3f8058f840253e
Reviewed-on: https://skia-review.googlesource.com/4346
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
Mike Klein 2016-11-03 10:20:35 -04:00 committed by Skia Commit-Bot
parent 70ac8a9d09
commit eea7c16d59
11 changed files with 75 additions and 18 deletions

View File

@ -72,7 +72,7 @@ public:
virtual void filterSpan4f(const SkPM4f src[], int count, SkPM4f result[]) const;
bool appendStages(SkRasterPipeline*) const;
bool appendStages(SkRasterPipeline*, bool shaderIsOpaque) const;
enum Flags {
/** If set the filter methods will not change the alpha channel of the colors.
@ -163,7 +163,7 @@ public:
protected:
SkColorFilter() {}
virtual bool onAppendStages(SkRasterPipeline*) const;
virtual bool onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const;
private:
/*

View File

@ -42,7 +42,7 @@ protected:
private:
SkLumaColorFilter();
bool onAppendStages(SkRasterPipeline*) const override;
bool onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const override;
typedef SkColorFilter INHERITED;
};

View File

@ -37,11 +37,11 @@ sk_sp<GrFragmentProcessor> SkColorFilter::asFragmentProcessor(GrContext*, SkColo
}
#endif
bool SkColorFilter::appendStages(SkRasterPipeline* pipeline) const {
return this->onAppendStages(pipeline);
bool SkColorFilter::appendStages(SkRasterPipeline* pipeline, bool shaderIsOpaque) const {
return this->onAppendStages(pipeline, shaderIsOpaque);
}
bool SkColorFilter::onAppendStages(SkRasterPipeline*) const {
bool SkColorFilter::onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const {
return false;
}

View File

@ -9,6 +9,7 @@
#include "SkColorPriv.h"
#include "SkNx.h"
#include "SkPM4fPriv.h"
#include "SkRasterPipeline.h"
#include "SkReadBuffer.h"
#include "SkRefCnt.h"
#include "SkString.h"
@ -230,6 +231,30 @@ static void set_concat(SkScalar result[20], const SkScalar outer[20], const SkSc
// End duplication
//////
bool SkColorMatrixFilterRowMajor255::onAppendStages(SkRasterPipeline* p,
bool shaderIsOpaque) const {
bool willStayOpaque = shaderIsOpaque && (fFlags & kAlphaUnchanged_Flag);
bool needsClamp0 = false,
needsClamp1 = false;
for (int i = 0; i < 4; i++) {
SkScalar min = fTranspose[i+16],
max = fTranspose[i+16];
(fTranspose[i+ 0] < 0 ? min : max) += fTranspose[i+ 0];
(fTranspose[i+ 4] < 0 ? min : max) += fTranspose[i+ 4];
(fTranspose[i+ 8] < 0 ? min : max) += fTranspose[i+ 8];
(fTranspose[i+12] < 0 ? min : max) += fTranspose[i+12];
needsClamp0 = needsClamp0 || min < 0;
needsClamp1 = needsClamp1 || max > 1;
}
if (!shaderIsOpaque) { p->append(SkRasterPipeline::unpremul); }
if ( true) { p->append(SkRasterPipeline::matrix_4x5, fTranspose); }
if (!willStayOpaque) { p->append(SkRasterPipeline::premul); }
if ( needsClamp0) { p->append(SkRasterPipeline::clamp_0); }
if ( needsClamp1) { p->append(SkRasterPipeline::clamp_a); }
return true;
}
sk_sp<SkColorFilter>
SkColorMatrixFilterRowMajor255::makeComposed(sk_sp<SkColorFilter> innerFilter) const {
SkScalar innerMatrix[20];

View File

@ -36,6 +36,8 @@ protected:
void flatten(SkWriteBuffer&) const override;
private:
bool onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const override;
SkScalar fMatrix[20];
float fTranspose[20]; // for Sk4s
uint32_t fFlags;

View File

@ -85,7 +85,7 @@ sk_sp<SkFlattenable> SkModeColorFilter::CreateProc(SkReadBuffer& buffer) {
return SkColorFilter::MakeModeFilter(color, mode);
}
bool SkModeColorFilter::onAppendStages(SkRasterPipeline* p) const {
bool SkModeColorFilter::onAppendStages(SkRasterPipeline* p, bool shaderIsOpaque) const {
// TODO: For some modes we can cut a stage by loading the fPM4f into dr,dg,db,da
// and applying the opposite xfermode, e.g. dst-in instead of src-in.
p->append(SkRasterPipeline::swap_src_dst);
@ -94,7 +94,7 @@ bool SkModeColorFilter::onAppendStages(SkRasterPipeline* p) const {
if (!SkBlendMode_AppendStages(mode, p)) {
return false;
}
if (SkBlendMode_CanOverflow(mode)) { p->append(SkRasterPipeline::clamp_1); }
if (SkBlendMode_CanOverflow(mode)) { p->append(SkRasterPipeline::clamp_a); }
return true;
}

View File

@ -45,7 +45,7 @@ protected:
void flatten(SkWriteBuffer&) const override;
bool onAppendStages(SkRasterPipeline*) const override;
bool onAppendStages(SkRasterPipeline*, bool shaderIsOpaque) const override;
private:
SkColor fColor;

View File

@ -55,7 +55,8 @@
// the Stage*. This mostly matters on 64-bit Windows where every register is precious.
#define SK_RASTER_PIPELINE_STAGES(M) \
M(swap_src_dst) M(constant_color) M(clamp_1) \
M(swap_src_dst) M(clamp_0) M(clamp_a) M(unpremul) M(premul) \
M(constant_color) \
M(load_s_565) M(load_d_565) M(store_565) \
M(load_s_srgb) M(load_d_srgb) M(store_srgb) \
M(load_s_f16) M(load_d_f16) M(store_f16) \
@ -67,7 +68,7 @@
M(clear) M(modulate) M(multiply) M(plus_) M(screen) M(xor_) \
M(colorburn) M(colordodge) M(darken) M(difference) \
M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight) \
M(luminance_to_alpha)
M(luminance_to_alpha) M(matrix_4x5)
class SkRasterPipeline {
public:

View File

@ -99,12 +99,18 @@ SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
return nullptr; // TODO
}
uint32_t paintColor = paint.getColor();
bool shaderIsOpaque = (paintColor >> 24) == 0xff;
SkRasterPipeline shader, colorFilter;
if (paint.getColorFilter() && !paint.getColorFilter()->appendStages(&colorFilter)) {
if (paint.getShader()) {
shaderIsOpaque = paint.getShader()->isOpaque();
}
if ( paint.getColorFilter() &&
!paint.getColorFilter()->appendStages(&colorFilter, shaderIsOpaque)) {
return nullptr;
}
uint32_t paintColor = paint.getColor();
SkColor4f color;
if (dst.info().colorSpace()) {
@ -166,7 +172,7 @@ void SkRasterPipelineBlitter::append_blend(SkRasterPipeline* p) const {
}
void SkRasterPipelineBlitter::maybe_clamp(SkRasterPipeline* p) const {
if (SkBlendMode_CanOverflow(fBlend)) { p->append(SkRasterPipeline::clamp_1); }
if (SkBlendMode_CanOverflow(fBlend)) { p->append(SkRasterPipeline::clamp_a); }
}
void SkRasterPipelineBlitter::blitH(int x, int y, int w) {

View File

@ -38,7 +38,7 @@ void SkLumaColorFilter::filterSpan(const SkPMColor src[], int count,
}
}
bool SkLumaColorFilter::onAppendStages(SkRasterPipeline* p) const {
bool SkLumaColorFilter::onAppendStages(SkRasterPipeline* p, bool shaderIsOpaque) const {
p->append(SkRasterPipeline::luminance_to_alpha);
return true;
}

View File

@ -193,22 +193,32 @@ SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) {
STAGE(just_return, false) { }
/* We don't seem to have a need for this yet.
STAGE(clamp_0, true) {
a = SkNf::Max(a, 0.0f);
r = SkNf::Max(r, 0.0f);
g = SkNf::Max(g, 0.0f);
b = SkNf::Max(b, 0.0f);
}
*/
STAGE(clamp_1, true) {
STAGE(clamp_a, true) {
a = SkNf::Min(a, 1.0f);
r = SkNf::Min(r, a);
g = SkNf::Min(g, a);
b = SkNf::Min(b, a);
}
STAGE(unpremul, true) {
r *= a.invert();
g *= a.invert();
b *= a.invert();
}
STAGE(premul, true) {
r *= a;
g *= a;
b *= a;
}
STAGE(swap_src_dst, true) {
SkTSwap(r,dr);
SkTSwap(g,dg);
@ -450,6 +460,19 @@ STAGE(luminance_to_alpha, true) {
r = g = b = 0;
}
STAGE(matrix_4x5, true) {
auto m = (const float*)ctx;
auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))),
G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))),
B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))),
A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19]))));
r = R;
g = G;
b = B;
a = A;
}
template <typename Fn>
SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {