Rework out-of-gamut handling in SkRasterPipeline
Instead of trying to carefully manage the in-gamut / out-of-gamut state of the pipeline, let's do what a GPU would do, clamping to representable range in any float -> integer conversion. Most effects doing table lookups now clamp themselves internally, and the store_foo() methods clamp when the destination is fixed point. In turn the from_srgb() conversions and all future transfer function stages can care less about this stuff. If I'm thinking right, the _lowp side of things need not change at all, and that will soften the performance impact of this change. Anything that was fast to begin with was probably running a _lowp pipeline. Bug: skia:7419 Change-Id: Id2e080ac240a97b900a1ac131c85d9e15f70af32 Reviewed-on: https://skia-review.googlesource.com/85740 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
parent
5b92ce1b24
commit
37155d476c
@ -18,7 +18,6 @@ void SkRasterPipeline::reset() {
|
||||
fStages = nullptr;
|
||||
fNumStages = 0;
|
||||
fSlotsNeeded = 1; // We always need one extra slot for just_return().
|
||||
fClamped = true;
|
||||
}
|
||||
|
||||
void SkRasterPipeline::append(StockStage stage, void* ctx) {
|
||||
@ -52,7 +51,6 @@ void SkRasterPipeline::extend(const SkRasterPipeline& src) {
|
||||
fStages = &stages[src.fNumStages - 1];
|
||||
fNumStages += src.fNumStages;
|
||||
fSlotsNeeded += src.fSlotsNeeded - 1; // Don't double count just_returns().
|
||||
fClamped = fClamped && src.fClamped;
|
||||
}
|
||||
|
||||
void SkRasterPipeline::dump() const {
|
||||
@ -125,26 +123,13 @@ void SkRasterPipeline::append_constant_color(SkArenaAlloc* alloc, const float rg
|
||||
#undef INC_WHITE
|
||||
#undef INC_COLOR
|
||||
|
||||
// It's pretty easy to start with sound premultiplied linear floats, pack those
|
||||
// to sRGB encoded bytes, then read them back to linear floats and find them not
|
||||
// quite premultiplied, with a color channel just a smidge greater than the alpha
|
||||
// channel. This can happen basically any time we have different transfer
|
||||
// functions for alpha and colors... sRGB being the only one we draw into.
|
||||
|
||||
// This is an annoying problem with no known good solution. So apply the clamp hammer.
|
||||
|
||||
void SkRasterPipeline::append_from_srgb(SkAlphaType at) {
|
||||
// TODO: we used to clamp to [0,a]] here if at == kPremul, but don't anymore.
|
||||
// These should no longer need to be special append() methods.
|
||||
void SkRasterPipeline::append_from_srgb(SkAlphaType) {
|
||||
this->unchecked_append(from_srgb, nullptr);
|
||||
if (at == kPremul_SkAlphaType) {
|
||||
this->append(SkRasterPipeline::clamp_a);
|
||||
}
|
||||
}
|
||||
|
||||
void SkRasterPipeline::append_from_srgb_dst(SkAlphaType at) {
|
||||
void SkRasterPipeline::append_from_srgb_dst(SkAlphaType) {
|
||||
this->unchecked_append(from_srgb_dst, nullptr);
|
||||
if (at == kPremul_SkAlphaType) {
|
||||
this->append(SkRasterPipeline::clamp_a_dst);
|
||||
}
|
||||
}
|
||||
|
||||
//static int gCounts[5] = { 0, 0, 0, 0, 0 };
|
||||
@ -189,15 +174,6 @@ void SkRasterPipeline::append_matrix(SkArenaAlloc* alloc, const SkMatrix& matrix
|
||||
}
|
||||
}
|
||||
|
||||
void SkRasterPipeline::clamp_if_unclamped(SkAlphaType alphaType) {
|
||||
if (!fClamped) {
|
||||
this->append(SkRasterPipeline::clamp_0);
|
||||
this->append(alphaType == kPremul_SkAlphaType ? SkRasterPipeline::clamp_a
|
||||
: SkRasterPipeline::clamp_1);
|
||||
fClamped = true;
|
||||
}
|
||||
}
|
||||
|
||||
void SkRasterPipeline::append_seed_shader() {
|
||||
static const float iota[] = {
|
||||
0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f,
|
||||
|
@ -150,11 +150,6 @@ public:
|
||||
|
||||
bool empty() const { return fStages == nullptr; }
|
||||
|
||||
// Used to track if we're handling values outside [0.0f, 1.0f],
|
||||
// and to clamp back to [0.0f, 1.0f] if so.
|
||||
void set_clamped(bool clamped) { fClamped = clamped; }
|
||||
void clamp_if_unclamped(SkAlphaType);
|
||||
|
||||
private:
|
||||
struct StageList {
|
||||
StageList* prev;
|
||||
@ -169,7 +164,6 @@ private:
|
||||
StageList* fStages;
|
||||
int fNumStages;
|
||||
int fSlotsNeeded;
|
||||
bool fClamped;
|
||||
};
|
||||
|
||||
template <size_t bytes>
|
||||
|
@ -258,10 +258,6 @@ void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p) const {
|
||||
p->append(SkRasterPipeline::dither, &fDitherRate);
|
||||
}
|
||||
|
||||
if (fDst.info().colorType() != kRGBA_F16_SkColorType) {
|
||||
p->clamp_if_unclamped(kPremul_SkAlphaType);
|
||||
}
|
||||
|
||||
switch (fDst.info().colorType()) {
|
||||
case kGray_8_SkColorType: p->append(SkRasterPipeline::luminance_to_alpha); // fallthru
|
||||
case kAlpha_8_SkColorType: p->append(SkRasterPipeline::store_a8, &fDstPtr); break;
|
||||
@ -311,7 +307,6 @@ void SkRasterPipelineBlitter::blitRect(int x, int y, int w, int h) {
|
||||
&& !fDst.colorSpace()
|
||||
&& fDst.info().alphaType() != kUnpremul_SkAlphaType
|
||||
&& fDitherRate == 0.0f) {
|
||||
p.clamp_if_unclamped(kPremul_SkAlphaType);
|
||||
auto stage = fDst.info().colorType() == kRGBA_8888_SkColorType
|
||||
? SkRasterPipeline::srcover_rgba_8888
|
||||
: SkRasterPipeline::srcover_bgra_8888;
|
||||
|
@ -113,13 +113,6 @@ public:
|
||||
if (fFlags & kG_Flag) { g = ptr; ptr += 256; }
|
||||
if (fFlags & kB_Flag) { b = ptr; }
|
||||
|
||||
// If our inputs are out of range, we'd attempt to read values outside our tables.
|
||||
// We could finesse this with p->clamp_if_unclamped(kPremul_SkAlphaType) here, but
|
||||
// this filter is already slow enough that I'd rather just be paranoid and safe.
|
||||
p->append(SkRasterPipeline::clamp_0);
|
||||
p->append(SkRasterPipeline::clamp_a);
|
||||
p->set_clamped(true);
|
||||
|
||||
if (!shaderIsOpaque) {
|
||||
p->append(SkRasterPipeline::unpremul);
|
||||
}
|
||||
|
@ -40,17 +40,12 @@ void SkToSRGBColorFilter::onAppendStages(SkRasterPipeline* p,
|
||||
}
|
||||
|
||||
// Step 2: Transform to sRGB gamut, without clamping.
|
||||
// TODO: because...
|
||||
float* gamut_transform = alloc->makeArrayDefault<float>(12);
|
||||
if (append_gamut_transform_noclamp(p,
|
||||
gamut_transform,
|
||||
fSrcColorSpace.get(),
|
||||
SkColorSpace::MakeSRGB().get())) {
|
||||
bool needs_clamp_0, needs_clamp_1;
|
||||
analyze_3x4_matrix(gamut_transform, &needs_clamp_0, &needs_clamp_1);
|
||||
if (needs_clamp_0 || needs_clamp_1) {
|
||||
p->set_clamped(false);
|
||||
}
|
||||
}
|
||||
(void)append_gamut_transform_noclamp(p,
|
||||
gamut_transform,
|
||||
fSrcColorSpace.get(),
|
||||
SkColorSpace::MakeSRGB().get());
|
||||
|
||||
// Step 3: Back to sRGB encoding.
|
||||
p->append(SkRasterPipeline::to_srgb);
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -208,6 +208,19 @@ SI U32 ix_and_ptr(T** ptr, const SkJumper_GatherCtx* ctx, F x, F y) {
|
||||
return trunc_(y)*ctx->stride + trunc_(x);
|
||||
}
|
||||
|
||||
// We often have a nominally [0,1] float value we need to scale and convert to an integer,
|
||||
// whether for a table lookup or to pack back down into bytes for storage.
|
||||
//
|
||||
// In practice, especially when dealing with interesting color spaces, that notionally
|
||||
// [0,1] float may be out of [0,1] range. Unorms cannot represent that, so we must clamp.
|
||||
//
|
||||
// You can adjust the expected input to [0,bias] by tweaking that parameter.
|
||||
SI U32 to_unorm(F v, F scale, F bias = 1.0f) {
|
||||
// TODO: platform-specific implementations to to_unorm(), removing round() entirely?
|
||||
// Any time we use round() we probably want to use to_unorm().
|
||||
return round(min(max(0, v), bias), scale);
|
||||
}
|
||||
|
||||
// Now finally, normal Stages!
|
||||
|
||||
STAGE(seed_shader, const float* iota) {
|
||||
@ -486,18 +499,19 @@ STAGE(srcover_rgba_8888, const SkJumper_MemoryCtx* ctx) {
|
||||
db = cast((dst >> 16) & 0xff);
|
||||
da = cast((dst >> 24) );
|
||||
// {dr,dg,db,da} are in [0,255]
|
||||
// { r, g, b, a} are in [0, 1]
|
||||
// { r, g, b, a} are in [0, 1] (but may be out of gamut)
|
||||
|
||||
r = mad(dr, inv(a), r*255.0f);
|
||||
g = mad(dg, inv(a), g*255.0f);
|
||||
b = mad(db, inv(a), b*255.0f);
|
||||
a = mad(da, inv(a), a*255.0f);
|
||||
// { r, g, b, a} are now in [0,255]
|
||||
// { r, g, b, a} are now in [0,255] (but may be out of gamut)
|
||||
|
||||
dst = round(r, 1.0f)
|
||||
| round(g, 1.0f) << 8
|
||||
| round(b, 1.0f) << 16
|
||||
| round(a, 1.0f) << 24;
|
||||
// to_unorm() clamps back to gamut. Scaling by 1 since we're already 255-biased.
|
||||
dst = to_unorm(r, 1, 255)
|
||||
| to_unorm(g, 1, 255) << 8
|
||||
| to_unorm(b, 1, 255) << 16
|
||||
| to_unorm(a, 1, 255) << 24;
|
||||
store(ptr, dst, tail);
|
||||
}
|
||||
|
||||
@ -510,18 +524,19 @@ STAGE(srcover_bgra_8888, const SkJumper_MemoryCtx* ctx) {
|
||||
dr = cast((dst >> 16) & 0xff);
|
||||
da = cast((dst >> 24) );
|
||||
// {dr,dg,db,da} are in [0,255]
|
||||
// { r, g, b, a} are in [0, 1]
|
||||
// { r, g, b, a} are in [0, 1] (but may be out of gamut)
|
||||
|
||||
r = mad(dr, inv(a), r*255.0f);
|
||||
g = mad(dg, inv(a), g*255.0f);
|
||||
b = mad(db, inv(a), b*255.0f);
|
||||
a = mad(da, inv(a), a*255.0f);
|
||||
// { r, g, b, a} are now in [0,255]
|
||||
// { r, g, b, a} are now in [0,255] (but may be out of gamut)
|
||||
|
||||
dst = round(b, 1.0f)
|
||||
| round(g, 1.0f) << 8
|
||||
| round(r, 1.0f) << 16
|
||||
| round(a, 1.0f) << 24;
|
||||
// to_unorm() clamps back to gamut. Scaling by 1 since we're already 255-biased.
|
||||
dst = to_unorm(b, 1, 255)
|
||||
| to_unorm(g, 1, 255) << 8
|
||||
| to_unorm(r, 1, 255) << 16
|
||||
| to_unorm(a, 1, 255) << 24;
|
||||
store(ptr, dst, tail);
|
||||
}
|
||||
|
||||
@ -796,24 +811,24 @@ STAGE(byte_tables, const void* ctx) { // TODO: rename Tables SkJumper_ByteTable
|
||||
struct Tables { const uint8_t *r, *g, *b, *a; };
|
||||
auto tables = (const Tables*)ctx;
|
||||
|
||||
r = from_byte(gather(tables->r, round(r, 255.0f)));
|
||||
g = from_byte(gather(tables->g, round(g, 255.0f)));
|
||||
b = from_byte(gather(tables->b, round(b, 255.0f)));
|
||||
a = from_byte(gather(tables->a, round(a, 255.0f)));
|
||||
r = from_byte(gather(tables->r, to_unorm(r, 255)));
|
||||
g = from_byte(gather(tables->g, to_unorm(g, 255)));
|
||||
b = from_byte(gather(tables->b, to_unorm(b, 255)));
|
||||
a = from_byte(gather(tables->a, to_unorm(a, 255)));
|
||||
}
|
||||
|
||||
STAGE(byte_tables_rgb, const void* ctx) { // TODO: rename Tables SkJumper_ByteTablesRGBCtx
|
||||
struct Tables { const uint8_t *r, *g, *b; int n; };
|
||||
auto tables = (const Tables*)ctx;
|
||||
|
||||
F scale = tables->n - 1;
|
||||
r = from_byte(gather(tables->r, round(r, scale)));
|
||||
g = from_byte(gather(tables->g, round(g, scale)));
|
||||
b = from_byte(gather(tables->b, round(b, scale)));
|
||||
int scale = tables->n - 1;
|
||||
r = from_byte(gather(tables->r, to_unorm(r, scale)));
|
||||
g = from_byte(gather(tables->g, to_unorm(g, scale)));
|
||||
b = from_byte(gather(tables->b, to_unorm(b, scale)));
|
||||
}
|
||||
|
||||
SI F table(F v, const SkJumper_TableCtx* ctx) {
|
||||
return gather(ctx->table, round(v, ctx->size - 1));
|
||||
return gather(ctx->table, to_unorm(v, ctx->size - 1));
|
||||
}
|
||||
STAGE(table_r, const SkJumper_TableCtx* ctx) { r = table(r, ctx); }
|
||||
STAGE(table_g, const SkJumper_TableCtx* ctx) { g = table(g, ctx); }
|
||||
@ -881,7 +896,7 @@ STAGE(gather_a8, const SkJumper_GatherCtx* ctx) {
|
||||
STAGE(store_a8, const SkJumper_MemoryCtx* ctx) {
|
||||
auto ptr = ptr_at_xy<uint8_t>(ctx, dx,dy);
|
||||
|
||||
U8 packed = pack(pack(round(a, 255.0f)));
|
||||
U8 packed = pack(pack(to_unorm(a, 255)));
|
||||
store(ptr, packed, tail);
|
||||
}
|
||||
|
||||
@ -925,9 +940,9 @@ STAGE(gather_565, const SkJumper_GatherCtx* ctx) {
|
||||
STAGE(store_565, const SkJumper_MemoryCtx* ctx) {
|
||||
auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
|
||||
|
||||
U16 px = pack( round(r, 31.0f) << 11
|
||||
| round(g, 63.0f) << 5
|
||||
| round(b, 31.0f) );
|
||||
U16 px = pack( to_unorm(r, 31) << 11
|
||||
| to_unorm(g, 63) << 5
|
||||
| to_unorm(b, 31) );
|
||||
store(ptr, px, tail);
|
||||
}
|
||||
|
||||
@ -946,10 +961,10 @@ STAGE(gather_4444, const SkJumper_GatherCtx* ctx) {
|
||||
}
|
||||
STAGE(store_4444, const SkJumper_MemoryCtx* ctx) {
|
||||
auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
|
||||
U16 px = pack( round(r, 15.0f) << 12
|
||||
| round(g, 15.0f) << 8
|
||||
| round(b, 15.0f) << 4
|
||||
| round(a, 15.0f) );
|
||||
U16 px = pack( to_unorm(r, 15) << 12
|
||||
| to_unorm(g, 15) << 8
|
||||
| to_unorm(b, 15) << 4
|
||||
| to_unorm(a, 15) );
|
||||
store(ptr, px, tail);
|
||||
}
|
||||
|
||||
@ -969,10 +984,10 @@ STAGE(gather_8888, const SkJumper_GatherCtx* ctx) {
|
||||
STAGE(store_8888, const SkJumper_MemoryCtx* ctx) {
|
||||
auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
|
||||
|
||||
U32 px = round(r, 255.0f)
|
||||
| round(g, 255.0f) << 8
|
||||
| round(b, 255.0f) << 16
|
||||
| round(a, 255.0f) << 24;
|
||||
U32 px = to_unorm(r, 255)
|
||||
| to_unorm(g, 255) << 8
|
||||
| to_unorm(b, 255) << 16
|
||||
| to_unorm(a, 255) << 24;
|
||||
store(ptr, px, tail);
|
||||
}
|
||||
|
||||
@ -992,10 +1007,10 @@ STAGE(gather_bgra, const SkJumper_GatherCtx* ctx) {
|
||||
STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) {
|
||||
auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
|
||||
|
||||
U32 px = round(b, 255.0f)
|
||||
| round(g, 255.0f) << 8
|
||||
| round(r, 255.0f) << 16
|
||||
| round(a, 255.0f) << 24;
|
||||
U32 px = to_unorm(b, 255)
|
||||
| to_unorm(g, 255) << 8
|
||||
| to_unorm(r, 255) << 16
|
||||
| to_unorm(a, 255) << 24;
|
||||
store(ptr, px, tail);
|
||||
}
|
||||
|
||||
@ -1064,10 +1079,10 @@ STAGE(load_rgb_u16_be, const SkJumper_MemoryCtx* ctx) {
|
||||
STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) {
|
||||
auto ptr = ptr_at_xy<uint16_t>(ctx, 4*dx,dy);
|
||||
|
||||
U16 R = bswap(pack(round(r, 65535.0f))),
|
||||
G = bswap(pack(round(g, 65535.0f))),
|
||||
B = bswap(pack(round(b, 65535.0f))),
|
||||
A = bswap(pack(round(a, 65535.0f)));
|
||||
U16 R = bswap(pack(to_unorm(r, 65535))),
|
||||
G = bswap(pack(to_unorm(g, 65535))),
|
||||
B = bswap(pack(to_unorm(b, 65535))),
|
||||
A = bswap(pack(to_unorm(a, 65535)));
|
||||
|
||||
store4(ptr,tail, R,G,B,A);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user