low-hanging bilerp_clamp_8888 wins

- support sampling bgra too
   - lowp impl

Bug: skia:7810

Change-Id: I21db805483f612024802f2b508c140c42a029c54
Reviewed-on: https://skia-review.googlesource.com/121582
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Florin Malita <fmalita@chromium.org>
This commit is contained in:
Mike Klein 2018-04-16 12:56:24 -04:00 committed by Skia Commit-Bot
parent 1d4af54a7b
commit f946b41643
2 changed files with 67 additions and 8 deletions

View File

@ -2264,7 +2264,7 @@ STAGE(gauss_a_to_rgba, Ctx::None) {
}
// A specialized fused image shader for clamp-x, clamp-y, non-sRGB sampling.
STAGE(bilerp_clamp_8888, SkJumper_GatherCtx* ctx) {
STAGE(bilerp_clamp_8888, const SkJumper_GatherCtx* ctx) {
// (cx,cy) are the center of our sample.
F cx = r,
cy = g;
@ -2600,6 +2600,7 @@ SI F floor_(F x) {
return roundtrip - if_then_else(roundtrip > x, F(1), F(0));
#endif
}
SI F fract(F x) { return x - floor_(x); }
SI F abs_(F x) { return bit_cast<F>( bit_cast<I32>(x) & 0x7fffffff ); }
// ~~~~~~ Basic / misc. stages ~~~~~~ //
@ -3320,6 +3321,59 @@ STAGE_PP(srcover_bgra_8888, const SkJumper_MemoryCtx* ctx) {
store_8888_(ptr, tail, b,g,r,a);
}
#if defined(SK_DISABLE_LOWP_BILERP_CLAMP_CLAMP_STAGE)
static void(*bilerp_clamp_8888)(void) = nullptr;
#else
STAGE_GP(bilerp_clamp_8888, const SkJumper_GatherCtx* ctx) {
// (cx,cy) are the center of our sample.
F cx = x,
cy = y;
// All sample points are at the same fractional offset (fx,fy).
// They're the 4 corners of a logical 1x1 pixel surrounding (x,y) at (0.5,0.5) offsets.
F fx = fract(cx + 0.5f),
fy = fract(cy + 0.5f);
// We'll accumulate the color of all four samples into {r,g,b,a} directly.
r = g = b = a = 0;
for (float dy = -0.5f; dy <= +0.5f; dy += 1.0f)
for (float dx = -0.5f; dx <= +0.5f; dx += 1.0f) {
// (x,y) are the coordinates of this sample point.
F x = cx + dx,
y = cy + dy;
// ix_and_ptr() will clamp to the image's bounds for us.
const uint32_t* ptr;
U32 ix = ix_and_ptr(&ptr, ctx, x,y);
U16 sr,sg,sb,sa;
from_8888(gather<U32>(ptr, ix), &sr,&sg,&sb,&sa);
// In bilinear interpolation, the 4 pixels at +/- 0.5 offsets from the sample pixel center
// are combined in direct proportion to their area overlapping that logical query pixel.
// At positive offsets, the x-axis contribution to that rectangle is fx,
// or (1-fx) at negative x. Same deal for y.
F sx = (dx > 0) ? fx : 1.0f - fx,
sy = (dy > 0) ? fy : 1.0f - fy;
// The sum of the four sx*sy products will add up to 1.0, so we can keep up to
// 8 bits of fractional precision here and not worry about overflowing 16-bit.
U16 area = cast<U16>(sx * sy * 256);
r += sr * area;
g += sg * area;
b += sb * area;
a += sa * area;
}
r /= 256;
g /= 256;
b /= 256;
a /= 256;
}
#endif
// Now we'll add null stand-ins for stages we haven't implemented in lowp.
// If a pipeline uses these stages, it'll boot it out of lowp into highp.
@ -3360,8 +3414,7 @@ static NotImplemented
alter_2pt_conical_unswap,
mask_2pt_conical_nan,
mask_2pt_conical_degenerates,
apply_vector_mask,
bilerp_clamp_8888;
apply_vector_mask;
#endif//defined(JUMPER_IS_SCALAR) controlling whether we build lowp stages
} // namespace lowp

View File

@ -416,13 +416,19 @@ bool SkImageShader::onAppendStages(const StageRec& rec) const {
return true;
};
if (quality == kLow_SkFilterQuality &&
info.colorType() == kRGBA_8888_SkColorType &&
fTileModeX == SkShader::kClamp_TileMode &&
fTileModeY == SkShader::kClamp_TileMode &&
!is_srgb) {
// We've got a fast path for 8888 bilinear clamp/clamp non-color-managed sampling.
auto ct = info.colorType();
if (true
&& (ct == kRGBA_8888_SkColorType || ct == kBGRA_8888_SkColorType)
&& quality == kLow_SkFilterQuality
&& fTileModeX == SkShader::kClamp_TileMode
&& fTileModeY == SkShader::kClamp_TileMode
&& !is_srgb) {
p->append(SkRasterPipeline::bilerp_clamp_8888, gather);
if (ct == kBGRA_8888_SkColorType) {
p->append(SkRasterPipeline::swap_rb);
}
return append_misc();
}