Rework out-of-gamut handling in SkRasterPipeline

Instead of trying to carefully manage the in-gamut / out-of-gamut state
of the pipeline, let's do what a GPU would do, clamping to representable
range in any float -> integer conversion.

Most effects doing table lookups now clamp themselves internally, and
the store_foo() methods clamp when the destination is fixed point.  In
turn the from_srgb() conversions and all future transfer function stages
can care less about this stuff.

If I'm thinking right, the _lowp side of things need not change at all,
and that will soften the performance impact of this change.  Anything
that was fast to begin with was probably running a _lowp pipeline.

Bug: skia:7419

Change-Id: Id2e080ac240a97b900a1ac131c85d9e15f70af32
Reviewed-on: https://skia-review.googlesource.com/85740
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
Mike Klein 2017-12-15 09:55:03 -05:00 committed by Skia Commit-Bot
parent 5b92ce1b24
commit 37155d476c
8 changed files with 14924 additions and 13709 deletions

View File

@ -18,7 +18,6 @@ void SkRasterPipeline::reset() {
fStages = nullptr;
fNumStages = 0;
fSlotsNeeded = 1; // We always need one extra slot for just_return().
fClamped = true;
}
void SkRasterPipeline::append(StockStage stage, void* ctx) {
@ -52,7 +51,6 @@ void SkRasterPipeline::extend(const SkRasterPipeline& src) {
fStages = &stages[src.fNumStages - 1];
fNumStages += src.fNumStages;
fSlotsNeeded += src.fSlotsNeeded - 1; // Don't double count just_returns().
fClamped = fClamped && src.fClamped;
}
void SkRasterPipeline::dump() const {
@ -125,26 +123,13 @@ void SkRasterPipeline::append_constant_color(SkArenaAlloc* alloc, const float rg
#undef INC_WHITE
#undef INC_COLOR
// It's pretty easy to start with sound premultiplied linear floats, pack those
// to sRGB encoded bytes, then read them back to linear floats and find them not
// quite premultiplied, with a color channel just a smidge greater than the alpha
// channel. This can happen basically any time we have different transfer
// functions for alpha and colors... sRGB being the only one we draw into.
// This is an annoying problem with no known good solution. So apply the clamp hammer.
void SkRasterPipeline::append_from_srgb(SkAlphaType at) {
// TODO: we used to clamp to [0,a]] here if at == kPremul, but don't anymore.
// These should no longer need to be special append() methods.
void SkRasterPipeline::append_from_srgb(SkAlphaType) {
this->unchecked_append(from_srgb, nullptr);
if (at == kPremul_SkAlphaType) {
this->append(SkRasterPipeline::clamp_a);
}
}
void SkRasterPipeline::append_from_srgb_dst(SkAlphaType at) {
void SkRasterPipeline::append_from_srgb_dst(SkAlphaType) {
this->unchecked_append(from_srgb_dst, nullptr);
if (at == kPremul_SkAlphaType) {
this->append(SkRasterPipeline::clamp_a_dst);
}
}
//static int gCounts[5] = { 0, 0, 0, 0, 0 };
@ -189,15 +174,6 @@ void SkRasterPipeline::append_matrix(SkArenaAlloc* alloc, const SkMatrix& matrix
}
}
void SkRasterPipeline::clamp_if_unclamped(SkAlphaType alphaType) {
if (!fClamped) {
this->append(SkRasterPipeline::clamp_0);
this->append(alphaType == kPremul_SkAlphaType ? SkRasterPipeline::clamp_a
: SkRasterPipeline::clamp_1);
fClamped = true;
}
}
void SkRasterPipeline::append_seed_shader() {
static const float iota[] = {
0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f,

View File

@ -150,11 +150,6 @@ public:
bool empty() const { return fStages == nullptr; }
// Used to track if we're handling values outside [0.0f, 1.0f],
// and to clamp back to [0.0f, 1.0f] if so.
void set_clamped(bool clamped) { fClamped = clamped; }
void clamp_if_unclamped(SkAlphaType);
private:
struct StageList {
StageList* prev;
@ -169,7 +164,6 @@ private:
StageList* fStages;
int fNumStages;
int fSlotsNeeded;
bool fClamped;
};
template <size_t bytes>

View File

@ -258,10 +258,6 @@ void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p) const {
p->append(SkRasterPipeline::dither, &fDitherRate);
}
if (fDst.info().colorType() != kRGBA_F16_SkColorType) {
p->clamp_if_unclamped(kPremul_SkAlphaType);
}
switch (fDst.info().colorType()) {
case kGray_8_SkColorType: p->append(SkRasterPipeline::luminance_to_alpha); // fallthru
case kAlpha_8_SkColorType: p->append(SkRasterPipeline::store_a8, &fDstPtr); break;
@ -311,7 +307,6 @@ void SkRasterPipelineBlitter::blitRect(int x, int y, int w, int h) {
&& !fDst.colorSpace()
&& fDst.info().alphaType() != kUnpremul_SkAlphaType
&& fDitherRate == 0.0f) {
p.clamp_if_unclamped(kPremul_SkAlphaType);
auto stage = fDst.info().colorType() == kRGBA_8888_SkColorType
? SkRasterPipeline::srcover_rgba_8888
: SkRasterPipeline::srcover_bgra_8888;

View File

@ -113,13 +113,6 @@ public:
if (fFlags & kG_Flag) { g = ptr; ptr += 256; }
if (fFlags & kB_Flag) { b = ptr; }
// If our inputs are out of range, we'd attempt to read values outside our tables.
// We could finesse this with p->clamp_if_unclamped(kPremul_SkAlphaType) here, but
// this filter is already slow enough that I'd rather just be paranoid and safe.
p->append(SkRasterPipeline::clamp_0);
p->append(SkRasterPipeline::clamp_a);
p->set_clamped(true);
if (!shaderIsOpaque) {
p->append(SkRasterPipeline::unpremul);
}

View File

@ -40,17 +40,12 @@ void SkToSRGBColorFilter::onAppendStages(SkRasterPipeline* p,
}
// Step 2: Transform to sRGB gamut, without clamping.
// TODO: because...
float* gamut_transform = alloc->makeArrayDefault<float>(12);
if (append_gamut_transform_noclamp(p,
gamut_transform,
fSrcColorSpace.get(),
SkColorSpace::MakeSRGB().get())) {
bool needs_clamp_0, needs_clamp_1;
analyze_3x4_matrix(gamut_transform, &needs_clamp_0, &needs_clamp_1);
if (needs_clamp_0 || needs_clamp_1) {
p->set_clamped(false);
}
}
(void)append_gamut_transform_noclamp(p,
gamut_transform,
fSrcColorSpace.get(),
SkColorSpace::MakeSRGB().get());
// Step 3: Back to sRGB encoding.
p->append(SkRasterPipeline::to_srgb);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -208,6 +208,19 @@ SI U32 ix_and_ptr(T** ptr, const SkJumper_GatherCtx* ctx, F x, F y) {
return trunc_(y)*ctx->stride + trunc_(x);
}
// We often have a nominally [0,1] float value we need to scale and convert to an integer,
// whether for a table lookup or to pack back down into bytes for storage.
//
// In practice, especially when dealing with interesting color spaces, that notionally
// [0,1] float may be out of [0,1] range. Unorms cannot represent that, so we must clamp.
//
// You can adjust the expected input to [0,bias] by tweaking that parameter.
SI U32 to_unorm(F v, F scale, F bias = 1.0f) {
// TODO: platform-specific implementations to to_unorm(), removing round() entirely?
// Any time we use round() we probably want to use to_unorm().
return round(min(max(0, v), bias), scale);
}
// Now finally, normal Stages!
STAGE(seed_shader, const float* iota) {
@ -486,18 +499,19 @@ STAGE(srcover_rgba_8888, const SkJumper_MemoryCtx* ctx) {
db = cast((dst >> 16) & 0xff);
da = cast((dst >> 24) );
// {dr,dg,db,da} are in [0,255]
// { r, g, b, a} are in [0, 1]
// { r, g, b, a} are in [0, 1] (but may be out of gamut)
r = mad(dr, inv(a), r*255.0f);
g = mad(dg, inv(a), g*255.0f);
b = mad(db, inv(a), b*255.0f);
a = mad(da, inv(a), a*255.0f);
// { r, g, b, a} are now in [0,255]
// { r, g, b, a} are now in [0,255] (but may be out of gamut)
dst = round(r, 1.0f)
| round(g, 1.0f) << 8
| round(b, 1.0f) << 16
| round(a, 1.0f) << 24;
// to_unorm() clamps back to gamut. Scaling by 1 since we're already 255-biased.
dst = to_unorm(r, 1, 255)
| to_unorm(g, 1, 255) << 8
| to_unorm(b, 1, 255) << 16
| to_unorm(a, 1, 255) << 24;
store(ptr, dst, tail);
}
@ -510,18 +524,19 @@ STAGE(srcover_bgra_8888, const SkJumper_MemoryCtx* ctx) {
dr = cast((dst >> 16) & 0xff);
da = cast((dst >> 24) );
// {dr,dg,db,da} are in [0,255]
// { r, g, b, a} are in [0, 1]
// { r, g, b, a} are in [0, 1] (but may be out of gamut)
r = mad(dr, inv(a), r*255.0f);
g = mad(dg, inv(a), g*255.0f);
b = mad(db, inv(a), b*255.0f);
a = mad(da, inv(a), a*255.0f);
// { r, g, b, a} are now in [0,255]
// { r, g, b, a} are now in [0,255] (but may be out of gamut)
dst = round(b, 1.0f)
| round(g, 1.0f) << 8
| round(r, 1.0f) << 16
| round(a, 1.0f) << 24;
// to_unorm() clamps back to gamut. Scaling by 1 since we're already 255-biased.
dst = to_unorm(b, 1, 255)
| to_unorm(g, 1, 255) << 8
| to_unorm(r, 1, 255) << 16
| to_unorm(a, 1, 255) << 24;
store(ptr, dst, tail);
}
@ -796,24 +811,24 @@ STAGE(byte_tables, const void* ctx) { // TODO: rename Tables SkJumper_ByteTable
struct Tables { const uint8_t *r, *g, *b, *a; };
auto tables = (const Tables*)ctx;
r = from_byte(gather(tables->r, round(r, 255.0f)));
g = from_byte(gather(tables->g, round(g, 255.0f)));
b = from_byte(gather(tables->b, round(b, 255.0f)));
a = from_byte(gather(tables->a, round(a, 255.0f)));
r = from_byte(gather(tables->r, to_unorm(r, 255)));
g = from_byte(gather(tables->g, to_unorm(g, 255)));
b = from_byte(gather(tables->b, to_unorm(b, 255)));
a = from_byte(gather(tables->a, to_unorm(a, 255)));
}
STAGE(byte_tables_rgb, const void* ctx) { // TODO: rename Tables SkJumper_ByteTablesRGBCtx
struct Tables { const uint8_t *r, *g, *b; int n; };
auto tables = (const Tables*)ctx;
F scale = tables->n - 1;
r = from_byte(gather(tables->r, round(r, scale)));
g = from_byte(gather(tables->g, round(g, scale)));
b = from_byte(gather(tables->b, round(b, scale)));
int scale = tables->n - 1;
r = from_byte(gather(tables->r, to_unorm(r, scale)));
g = from_byte(gather(tables->g, to_unorm(g, scale)));
b = from_byte(gather(tables->b, to_unorm(b, scale)));
}
SI F table(F v, const SkJumper_TableCtx* ctx) {
return gather(ctx->table, round(v, ctx->size - 1));
return gather(ctx->table, to_unorm(v, ctx->size - 1));
}
STAGE(table_r, const SkJumper_TableCtx* ctx) { r = table(r, ctx); }
STAGE(table_g, const SkJumper_TableCtx* ctx) { g = table(g, ctx); }
@ -881,7 +896,7 @@ STAGE(gather_a8, const SkJumper_GatherCtx* ctx) {
STAGE(store_a8, const SkJumper_MemoryCtx* ctx) {
auto ptr = ptr_at_xy<uint8_t>(ctx, dx,dy);
U8 packed = pack(pack(round(a, 255.0f)));
U8 packed = pack(pack(to_unorm(a, 255)));
store(ptr, packed, tail);
}
@ -925,9 +940,9 @@ STAGE(gather_565, const SkJumper_GatherCtx* ctx) {
STAGE(store_565, const SkJumper_MemoryCtx* ctx) {
auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
U16 px = pack( round(r, 31.0f) << 11
| round(g, 63.0f) << 5
| round(b, 31.0f) );
U16 px = pack( to_unorm(r, 31) << 11
| to_unorm(g, 63) << 5
| to_unorm(b, 31) );
store(ptr, px, tail);
}
@ -946,10 +961,10 @@ STAGE(gather_4444, const SkJumper_GatherCtx* ctx) {
}
STAGE(store_4444, const SkJumper_MemoryCtx* ctx) {
auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
U16 px = pack( round(r, 15.0f) << 12
| round(g, 15.0f) << 8
| round(b, 15.0f) << 4
| round(a, 15.0f) );
U16 px = pack( to_unorm(r, 15) << 12
| to_unorm(g, 15) << 8
| to_unorm(b, 15) << 4
| to_unorm(a, 15) );
store(ptr, px, tail);
}
@ -969,10 +984,10 @@ STAGE(gather_8888, const SkJumper_GatherCtx* ctx) {
STAGE(store_8888, const SkJumper_MemoryCtx* ctx) {
auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
U32 px = round(r, 255.0f)
| round(g, 255.0f) << 8
| round(b, 255.0f) << 16
| round(a, 255.0f) << 24;
U32 px = to_unorm(r, 255)
| to_unorm(g, 255) << 8
| to_unorm(b, 255) << 16
| to_unorm(a, 255) << 24;
store(ptr, px, tail);
}
@ -992,10 +1007,10 @@ STAGE(gather_bgra, const SkJumper_GatherCtx* ctx) {
STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) {
auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
U32 px = round(b, 255.0f)
| round(g, 255.0f) << 8
| round(r, 255.0f) << 16
| round(a, 255.0f) << 24;
U32 px = to_unorm(b, 255)
| to_unorm(g, 255) << 8
| to_unorm(r, 255) << 16
| to_unorm(a, 255) << 24;
store(ptr, px, tail);
}
@ -1064,10 +1079,10 @@ STAGE(load_rgb_u16_be, const SkJumper_MemoryCtx* ctx) {
STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) {
auto ptr = ptr_at_xy<uint16_t>(ctx, 4*dx,dy);
U16 R = bswap(pack(round(r, 65535.0f))),
G = bswap(pack(round(g, 65535.0f))),
B = bswap(pack(round(b, 65535.0f))),
A = bswap(pack(round(a, 65535.0f)));
U16 R = bswap(pack(to_unorm(r, 65535))),
G = bswap(pack(to_unorm(g, 65535))),
B = bswap(pack(to_unorm(b, 65535))),
A = bswap(pack(to_unorm(a, 65535)));
store4(ptr,tail, R,G,B,A);
}