Revert "attempt 2: add experimental bilerp_clamp_8888 stage"

This reverts commit 8a64e52a98.

Reason for revert:

Test-Android-Clang-NexusPlayer-CPU-Moorefield-x86-Release-All-Android
Test-Android-Clang-NexusPlayer-GPU-PowerVR-x86-Release-All-Android

Original change's description:
> attempt 2: add experimental bilerp_clamp_8888 stage
> 
> It looks like we can specialize hot image shaders into their
> own single stages for a good speedup on both x86 and ARM.
> 
> I've started here with bilerp_clamp_8888, and will
> follow up with bgra and 565, and lowp versions of those,
> and probably also the same for nearest neighbors.
> 
> All pixels are identical in GMs.
> 
> Change-Id: Ib5ed6e528efd9e3eed96ba67d02fbec2e8133a81
> Reviewed-on: https://skia-review.googlesource.com/86860
> Reviewed-by: Mike Klein <mtklein@chromium.org>
> Commit-Queue: Mike Klein <mtklein@chromium.org>

TBR=mtklein@chromium.org,liyuqian@google.com

Change-Id: I34409a7b4aee4fd54baee44f7fc53bd0982500fe
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/86601
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2017-12-18 23:16:35 +00:00 committed by Skia Commit-Bot
parent 8a64e52a98
commit 245c4b21c1
7 changed files with 9242 additions and 11151 deletions

View File

@ -53,7 +53,6 @@ struct SkJumper_Engine;
M(load_f32) M(load_f32_dst) M(store_f32) \
M(load_8888) M(load_8888_dst) M(store_8888) M(gather_8888) \
M(load_bgra) M(load_bgra_dst) M(store_bgra) M(gather_bgra) \
M(bilerp_clamp_8888) \
M(load_u16_be) M(load_rgb_u16_be) M(store_u16_be) \
M(load_tables_u16_be) M(load_tables_rgb_u16_be) M(load_tables) \
M(load_rgba) M(store_rgba) \

View File

@ -219,7 +219,6 @@ extern "C" {
NOPE(load_f32) NOPE(load_f32_dst) NOPE(store_f32)
LOWP(load_8888) LOWP(load_8888_dst) LOWP(store_8888) LOWP(gather_8888)
LOWP(load_bgra) LOWP(load_bgra_dst) LOWP(store_bgra) LOWP(gather_bgra)
TODO(bilerp_clamp_8888)
TODO(load_u16_be) TODO(load_rgb_u16_be) TODO(store_u16_be)
NOPE(load_tables_u16_be) NOPE(load_tables_rgb_u16_be) NOPE(load_tables)
NOPE(load_rgba) NOPE(store_rgba)

View File

@ -48,10 +48,10 @@ struct SkJumper_MemoryCtx {
};
struct SkJumper_GatherCtx {
const void* pixels;
int stride;
float width;
float height;
void* pixels;
int stride;
float width,
height;
};
// State shared by save_xy, accumulate, and bilinear_* / bicubic_*.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -194,15 +194,13 @@ SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, int dx, int dy) {
return (T*)ctx->pixels + dy*ctx->stride + dx;
}
// clamp v to [0,limit).
SI F clamp(F v, F limit) {
F inclusive = bit_cast<F>( bit_cast<U32>(limit) - 1 ); // Exclusive -> inclusive.
return min(max(0, v), inclusive);
}
// Used by gather_ stages to calculate the base pointer and a vector of indices to load.
template <typename T>
SI U32 ix_and_ptr(T** ptr, const SkJumper_GatherCtx* ctx, F x, F y) {
auto clamp = [](F v, F limit) {
limit = bit_cast<F>( bit_cast<U32>(limit) - 1 ); // Exclusive -> inclusive.
return min(max(0, v), limit);
};
x = clamp(x, ctx->width);
y = clamp(y, ctx->height);
@ -1523,47 +1521,3 @@ STAGE(gauss_a_to_rgba, Ctx::None) {
g = a;
b = a;
}
// A specialized fused image shader for clamp-x, clamp-y, non-sRGB sampling.
STAGE(bilerp_clamp_8888, SkJumper_GatherCtx* ctx) {
// (cx,cy) are the center of our sample.
F cx = r,
cy = g;
// All sample points are at the same fractional offset (fx,fy).
// They're the 4 corners of a logical 1x1 pixel surrounding (x,y) at (0.5,0.5) offsets.
F fx = fract(cx + 0.5f),
fy = fract(cy + 0.5f);
// We'll accumulate the color of all four samples into {r,g,b,a} directly.
r = g = b = a = 0;
float offsets[] = {-0.5f,+0.5f};
for (float dy : offsets)
for (float dx : offsets) {
// (x,y) are the coordinates of this sample point.
F x = cx + dx,
y = cy + dy;
// ix_and_ptr() will clamp to the image's bounds for us.
const uint32_t* ptr;
U32 ix = ix_and_ptr(&ptr, ctx, x,y);
F sr,sg,sb,sa;
from_8888(gather(ptr, ix), &sr,&sg,&sb,&sa);
// In bilinear interpolation, the 4 pixels at +/- 0.5 offsets from the sample pixel center
// are combined in direct proportion to their area overlapping that logical query pixel.
// At positive offsets, the x-axis contribution to that rectangle is fx,
// or (1-fx) at negative x. Same deal for y.
F sx = (dx > 0) ? fx : 1.0f - fx,
sy = (dy > 0) ? fy : 1.0f - fy,
area = sx * sy;
r += sr * area;
g += sg * area;
b += sb * area;
a += sa * area;
}
}

View File

@ -313,7 +313,7 @@ bool SkImageShader::onAppendStages(const StageRec& rec) const {
p->append_matrix(alloc, matrix);
auto gather = alloc->make<SkJumper_GatherCtx>();
gather->pixels = pm.addr();
gather->pixels = pm.writable_addr(); // Don't worry, we won't write to it.
gather->stride = pm.rowBytesAsPixels();
gather->width = pm.width();
gather->height = pm.height();
@ -325,8 +325,6 @@ bool SkImageShader::onAppendStages(const StageRec& rec) const {
limit_y->scale = pm.height();
limit_y->invScale = 1.0f / pm.height();
bool is_srgb = rec.fDstCS && (!info.colorSpace() || info.gammaCloseToSRGB());
auto append_tiling_and_gather = [&] {
switch (fTileModeX) {
case kClamp_TileMode: /* The gather_xxx stage will clamp for us. */ break;
@ -348,38 +346,11 @@ bool SkImageShader::onAppendStages(const StageRec& rec) const {
case kRGBA_F16_SkColorType: p->append(SkRasterPipeline::gather_f16, gather); break;
default: SkASSERT(false);
}
if (is_srgb) {
if (rec.fDstCS && (!info.colorSpace() || info.gammaCloseToSRGB())) {
p->append(SkRasterPipeline::from_srgb);
}
};
auto append_misc = [&] {
if (info.colorType() == kAlpha_8_SkColorType) {
p->append(SkRasterPipeline::set_rgb, &misc->paint_color);
}
if (info.colorType() == kAlpha_8_SkColorType ||
info.alphaType() == kUnpremul_SkAlphaType) {
p->append(SkRasterPipeline::premul);
}
if (quality > kLow_SkFilterQuality) {
// Bicubic filtering naturally produces out of range values on both sides.
p->append(SkRasterPipeline::clamp_0);
p->append(SkRasterPipeline::clamp_a);
}
append_gamut_transform(p, alloc, info.colorSpace(), rec.fDstCS, kPremul_SkAlphaType);
return true;
};
if (quality == kLow_SkFilterQuality &&
info.colorType() == kRGBA_8888_SkColorType &&
fTileModeX == SkShader::kClamp_TileMode &&
fTileModeY == SkShader::kClamp_TileMode &&
!is_srgb) {
p->append(SkRasterPipeline::bilerp_clamp_8888, gather);
return append_misc();
}
SkJumper_SamplerCtx* sampler = nullptr;
if (quality != kNone_SkFilterQuality) {
sampler = alloc->make<SkJumper_SamplerCtx>();
@ -395,7 +366,6 @@ bool SkImageShader::onAppendStages(const StageRec& rec) const {
if (quality == kNone_SkFilterQuality) {
append_tiling_and_gather();
} else if (quality == kLow_SkFilterQuality) {
p->append(SkRasterPipeline::save_xy, sampler);
@ -405,7 +375,6 @@ bool SkImageShader::onAppendStages(const StageRec& rec) const {
sample(SkRasterPipeline::bilinear_px, SkRasterPipeline::bilinear_py);
p->append(SkRasterPipeline::move_dst_src);
} else {
p->append(SkRasterPipeline::save_xy, sampler);
@ -432,5 +401,17 @@ bool SkImageShader::onAppendStages(const StageRec& rec) const {
p->append(SkRasterPipeline::move_dst_src);
}
return append_misc();
if (info.colorType() == kAlpha_8_SkColorType) {
p->append(SkRasterPipeline::set_rgb, &misc->paint_color);
}
if (info.colorType() == kAlpha_8_SkColorType || info.alphaType() == kUnpremul_SkAlphaType) {
p->append(SkRasterPipeline::premul);
}
if (quality > kLow_SkFilterQuality) {
// Bicubic filtering naturally produces out of range values on both sides.
p->append(SkRasterPipeline::clamp_0);
p->append(SkRasterPipeline::clamp_a);
}
append_gamut_transform(p, alloc, info.colorSpace(), rec.fDstCS, kPremul_SkAlphaType);
return true;
}