bicubic, attempt gazillion

- explicitly separate bilinear_ stages in x and y too

BUG=skia:

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD

Change-Id: Ib7b4f9d26ea6abe9171068e92424479d811ee606
Reviewed-on: https://skia-review.googlesource.com/5636
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
Mike Klein 2016-12-09 16:25:44 -05:00 committed by Skia Commit-Bot
parent 21aa35f0aa
commit b0b17d1e53
7 changed files with 155 additions and 38 deletions

View File

@ -40,11 +40,15 @@ SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvi
class SkDefaultBitmapControllerState : public SkBitmapController::State {
public:
SkDefaultBitmapControllerState(const SkBitmapProvider&, const SkMatrix& inv, SkFilterQuality);
SkDefaultBitmapControllerState(const SkBitmapProvider&,
const SkMatrix& inv,
SkFilterQuality,
bool canShadeHQ);
private:
SkBitmap fResultBitmap;
sk_sp<const SkMipMap> fCurrMip;
bool fCanShadeHQ;
bool processExternalRequest(const SkBitmapProvider&);
bool processHQRequest(const SkBitmapProvider&);
@ -128,6 +132,14 @@ bool SkDefaultBitmapControllerState::processHQRequest(const SkBitmapProvider& pr
return false; // only use HQ when upsampling
}
// If the shader can natively handle HQ filtering, let it do it.
if (fCanShadeHQ) {
fQuality = kHigh_SkFilterQuality;
SkAssertResult(provider.asBitmap(&fResultBitmap));
fResultBitmap.lockPixels();
return true;
}
const int dstW = SkScalarRoundToScalar(provider.width() / invScaleX);
const int dstH = SkScalarRoundToScalar(provider.height() / invScaleY);
const SkBitmapCacheDesc desc = provider.makeCacheDesc(dstW, dstH);
@ -222,9 +234,11 @@ bool SkDefaultBitmapControllerState::processMediumRequest(const SkBitmapProvider
SkDefaultBitmapControllerState::SkDefaultBitmapControllerState(const SkBitmapProvider& provider,
const SkMatrix& inv,
SkFilterQuality qual) {
SkFilterQuality qual,
bool canShadeHQ) {
fInvMatrix = inv;
fQuality = qual;
fCanShadeHQ = canShadeHQ;
bool processed = this->processExternalRequest(provider);
@ -239,7 +253,7 @@ SkDefaultBitmapControllerState::SkDefaultBitmapControllerState(const SkBitmapPro
fResultBitmap.lockPixels();
// lock may fail to give us pixels
}
SkASSERT(fQuality <= kLow_SkFilterQuality);
SkASSERT(fCanShadeHQ || fQuality <= kLow_SkFilterQuality);
// fResultBitmap.getPixels() may be null, but our caller knows to check fPixmap.addr()
// and will destroy us if it is nullptr.
@ -251,5 +265,6 @@ SkBitmapController::State* SkDefaultBitmapController::onRequestBitmap(const SkBi
const SkMatrix& inverse,
SkFilterQuality quality,
void* storage, size_t size) {
return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size, bm, inverse, quality);
return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size,
bm, inverse, quality, fCanShadeHQ);
}

View File

@ -57,11 +57,14 @@ protected:
class SkDefaultBitmapController : public SkBitmapController {
public:
SkDefaultBitmapController() {}
enum class CanShadeHQ { kNo, kYes };
SkDefaultBitmapController(CanShadeHQ canShadeHQ)
: fCanShadeHQ(canShadeHQ == CanShadeHQ::kYes) {}
protected:
State* onRequestBitmap(const SkBitmapProvider&, const SkMatrix& inverse, SkFilterQuality,
void* storage, size_t storageSize) override;
bool fCanShadeHQ;
};
#endif

View File

@ -109,7 +109,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) {
allow_ignore_fractional_translate = false;
}
SkDefaultBitmapController controller;
SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kNo);
fBMState = controller.requestBitmap(fProvider, inv, paint.getFilterQuality(),
fBMStateStorage.get(), fBMStateStorage.size());
// Note : we allow the controller to return an empty (zero-dimension) result. Should we?

View File

@ -86,7 +86,9 @@
M(clamp_y) M(mirror_y) M(repeat_y) \
M(gather_a8) M(gather_g8) M(gather_i8) \
M(gather_565) M(gather_4444) M(gather_8888) M(gather_f16) \
M(bilinear_nn) M(bilinear_pn) M(bilinear_np) M(bilinear_pp) \
M(bilinear_nx) M(bilinear_px) M(bilinear_ny) M(bilinear_py) \
M(bicubic_n3x) M(bicubic_n1x) M(bicubic_p1x) M(bicubic_p3x) \
M(bicubic_n3y) M(bicubic_n1y) M(bicubic_p1y) M(bicubic_p3y) \
M(save_xy) M(accumulate)
class SkRasterPipeline {

View File

@ -279,7 +279,7 @@ bool SkImageShader::onAppendStages(SkRasterPipeline* p, SkColorSpace* dst, SkFal
auto quality = paint.getFilterQuality();
SkBitmapProvider provider(fImage.get(), dst);
SkDefaultBitmapController controller;
SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kYes);
std::unique_ptr<SkBitmapController::State> state {
controller.requestBitmap(provider, matrix, quality)
};
@ -293,7 +293,8 @@ bool SkImageShader::onAppendStages(SkRasterPipeline* p, SkColorSpace* dst, SkFal
auto info = pm.info();
// When the matrix is just an integer translate, bilerp == nearest neighbor.
if (matrix.getType() <= SkMatrix::kTranslate_Mask &&
if (quality == kLow_SkFilterQuality &&
matrix.getType() <= SkMatrix::kTranslate_Mask &&
matrix.getTranslateX() == (int)matrix.getTranslateX() &&
matrix.getTranslateY() == (int)matrix.getTranslateY()) {
quality = kNone_SkFilterQuality;
@ -353,20 +354,48 @@ bool SkImageShader::onAppendStages(SkRasterPipeline* p, SkColorSpace* dst, SkFal
}
};
auto sample = [&](SkRasterPipeline::StockStage sampler) {
p->append(sampler, ctx);
auto sample = [&](SkRasterPipeline::StockStage setup_x,
SkRasterPipeline::StockStage setup_y) {
p->append(setup_x, ctx);
p->append(setup_y, ctx);
append_tiling_and_gather();
p->append(SkRasterPipeline::accumulate, ctx);
};
if (quality == kNone_SkFilterQuality) {
append_tiling_and_gather();
} else if (quality == kLow_SkFilterQuality) {
p->append(SkRasterPipeline::save_xy, ctx);
sample(SkRasterPipeline::bilinear_nx, SkRasterPipeline::bilinear_ny);
sample(SkRasterPipeline::bilinear_px, SkRasterPipeline::bilinear_ny);
sample(SkRasterPipeline::bilinear_nx, SkRasterPipeline::bilinear_py);
sample(SkRasterPipeline::bilinear_px, SkRasterPipeline::bilinear_py);
p->append(SkRasterPipeline::move_dst_src);
} else {
p->append(SkRasterPipeline::save_xy, ctx);
sample(SkRasterPipeline::bilinear_nn);
sample(SkRasterPipeline::bilinear_np);
sample(SkRasterPipeline::bilinear_pn);
sample(SkRasterPipeline::bilinear_pp);
sample(SkRasterPipeline::bicubic_n3x, SkRasterPipeline::bicubic_n3y);
sample(SkRasterPipeline::bicubic_n1x, SkRasterPipeline::bicubic_n3y);
sample(SkRasterPipeline::bicubic_p1x, SkRasterPipeline::bicubic_n3y);
sample(SkRasterPipeline::bicubic_p3x, SkRasterPipeline::bicubic_n3y);
sample(SkRasterPipeline::bicubic_n3x, SkRasterPipeline::bicubic_n1y);
sample(SkRasterPipeline::bicubic_n1x, SkRasterPipeline::bicubic_n1y);
sample(SkRasterPipeline::bicubic_p1x, SkRasterPipeline::bicubic_n1y);
sample(SkRasterPipeline::bicubic_p3x, SkRasterPipeline::bicubic_n1y);
sample(SkRasterPipeline::bicubic_n3x, SkRasterPipeline::bicubic_p1y);
sample(SkRasterPipeline::bicubic_n1x, SkRasterPipeline::bicubic_p1y);
sample(SkRasterPipeline::bicubic_p1x, SkRasterPipeline::bicubic_p1y);
sample(SkRasterPipeline::bicubic_p3x, SkRasterPipeline::bicubic_p1y);
sample(SkRasterPipeline::bicubic_n3x, SkRasterPipeline::bicubic_p3y);
sample(SkRasterPipeline::bicubic_n1x, SkRasterPipeline::bicubic_p3y);
sample(SkRasterPipeline::bicubic_p1x, SkRasterPipeline::bicubic_p3y);
sample(SkRasterPipeline::bicubic_p3x, SkRasterPipeline::bicubic_p3y);
p->append(SkRasterPipeline::move_dst_src);
}
@ -383,5 +412,10 @@ bool SkImageShader::onAppendStages(SkRasterPipeline* p, SkColorSpace* dst, SkFal
if (info.colorType() == kAlpha_8_SkColorType || info.alphaType() == kUnpremul_SkAlphaType) {
p->append(SkRasterPipeline::premul);
}
if (quality > kLow_SkFilterQuality) {
// Bicubic filtering naturally produces out of range values on both sides.
p->append(SkRasterPipeline::clamp_0);
p->append(SkRasterPipeline::clamp_a);
}
return append_gamut_transform(p, scratch, info.colorSpace(), dst);
}

View File

@ -30,7 +30,8 @@ struct SkImageShaderContext {
float y[8];
float fx[8];
float fy[8];
float scale[8];
float scalex[8];
float scaley[8];
};
#endif//SkImageShaderContext_DEFINED

View File

@ -809,42 +809,104 @@ STAGE(save_xy) {
r.store(sc->x);
g.store(sc->y);
// Whether bilinear or bicubic, all sample points have the same fractional offset (fx,fy).
// They're either the 4 corners of a logical 1x1 pixel or the 16 corners of a 3x3 grid
// surrounding (x,y), all (0.5,0.5) off-center.
auto fract = [](const SkNf& v) { return v - v.floor(); };
fract(r + 0.5f).store(sc->fx);
fract(g + 0.5f).store(sc->fy);
}
template <int X, int Y>
SI void bilinear(void* ctx, SkNf* x, SkNf* y) {
auto sc = (SkImageShaderContext*)ctx;
// Bilinear interpolation considers the 4 physical pixels at
// each corner of a logical pixel centered at (sc->x, sc->y).
*x = SkNf::Load(sc->x) + X*0.5f;
*y = SkNf::Load(sc->y) + Y*0.5f;
// Each corner pixel contributes color in direct proportion to its overlap.
auto fx = SkNf::Load(sc->fx),
fy = SkNf::Load(sc->fy);
auto overlap = (X > 0 ? fx : (1.0f - fx))
* (Y > 0 ? fy : (1.0f - fy));
overlap.store(sc->scale);
}
STAGE(bilinear_nn) { bilinear<-1,-1>(ctx, &r, &g); }
STAGE(bilinear_pn) { bilinear<+1,-1>(ctx, &r, &g); }
STAGE(bilinear_np) { bilinear<-1,+1>(ctx, &r, &g); }
STAGE(bilinear_pp) { bilinear<+1,+1>(ctx, &r, &g); }
STAGE(accumulate) {
auto sc = (const SkImageShaderContext*)ctx;
auto scale = SkNf::Load(sc->scale);
// Bilinear and bicubic filtering are both separable, so we'll end up with independent
// scale contributions in x and y that we multiply together to get each pixel's scale factor.
auto scale = SkNf::Load(sc->scalex) * SkNf::Load(sc->scaley);
dr = SkNf_fma(scale, r, dr);
dg = SkNf_fma(scale, g, dg);
db = SkNf_fma(scale, b, db);
da = SkNf_fma(scale, a, da);
}
// In bilinear interpolation, the 4 pixels at +/- 0.5 offsets from the sample pixel center
// are combined in direct proportion to their area overlapping that logical query pixel.
// At positive offsets, the x-axis contribution to that rectangular area is fx; (1-fx)
// at negative x offsets. The y-axis is treated symmetrically.
template <int Scale>
SI void bilinear_x(void* ctx, SkNf* x) {
auto sc = (SkImageShaderContext*)ctx;
*x = SkNf::Load(sc->x) + Scale*0.5f;
auto fx = SkNf::Load(sc->fx);
(Scale > 0 ? fx : (1.0f - fx)).store(sc->scalex);
}
template <int Scale>
SI void bilinear_y(void* ctx, SkNf* y) {
auto sc = (SkImageShaderContext*)ctx;
*y = SkNf::Load(sc->y) + Scale*0.5f;
auto fy = SkNf::Load(sc->fy);
(Scale > 0 ? fy : (1.0f - fy)).store(sc->scaley);
}
STAGE(bilinear_nx) { bilinear_x<-1>(ctx, &r); }
STAGE(bilinear_px) { bilinear_x<+1>(ctx, &r); }
STAGE(bilinear_ny) { bilinear_y<-1>(ctx, &g); }
STAGE(bilinear_py) { bilinear_y<+1>(ctx, &g); }
// In bilinear interpolation, the 16 pixels at +/- 0.5 and +/- 1.5 offsets from the sample
// pixel center are combined with a non-uniform cubic filter, with high filter values near
// the center and lower values farther away.
//
// We break this filter function into two parts, one for near +/- 0.5 offsets,
// and one for far +/- 1.5 offsets.
//
// See GrBicubicEffect for details about this particular Mitchell-Netravali filter.
SI SkNf bicubic_near(const SkNf& t) {
// 1/18 + 9/18t + 27/18t^2 - 21/18t^3 == t ( t ( -21/18t + 27/18) + 9/18) + 1/18
return SkNf_fma(t, SkNf_fma(t, SkNf_fma(-21/18.0f, t, 27/18.0f), 9/18.0f), 1/18.0f);
}
SI SkNf bicubic_far(const SkNf& t) {
// 0/18 + 0/18*t - 6/18t^2 + 7/18t^3 == t^2 (7/18t - 6/18)
return (t*t)*SkNf_fma(7/18.0f, t, -6/18.0f);
}
template <int Scale>
SI void bicubic_x(void* ctx, SkNf* x) {
auto sc = (SkImageShaderContext*)ctx;
*x = SkNf::Load(sc->x) + Scale*0.5f;
auto fx = SkNf::Load(sc->fx);
if (Scale == -3) { return bicubic_far (1.0f - fx).store(sc->scalex); }
if (Scale == -1) { return bicubic_near(1.0f - fx).store(sc->scalex); }
if (Scale == +1) { return bicubic_near( fx).store(sc->scalex); }
if (Scale == +3) { return bicubic_far ( fx).store(sc->scalex); }
SkDEBUGFAIL("unreachable");
}
template <int Scale>
SI void bicubic_y(void* ctx, SkNf* y) {
auto sc = (SkImageShaderContext*)ctx;
*y = SkNf::Load(sc->y) + Scale*0.5f;
auto fy = SkNf::Load(sc->fy);
if (Scale == -3) { return bicubic_far (1.0f - fy).store(sc->scaley); }
if (Scale == -1) { return bicubic_near(1.0f - fy).store(sc->scaley); }
if (Scale == +1) { return bicubic_near( fy).store(sc->scaley); }
if (Scale == +3) { return bicubic_far ( fy).store(sc->scaley); }
SkDEBUGFAIL("unreachable");
}
STAGE(bicubic_n3x) { bicubic_x<-3>(ctx, &r); }
STAGE(bicubic_n1x) { bicubic_x<-1>(ctx, &r); }
STAGE(bicubic_p1x) { bicubic_x<+1>(ctx, &r); }
STAGE(bicubic_p3x) { bicubic_x<+3>(ctx, &r); }
STAGE(bicubic_n3y) { bicubic_y<-3>(ctx, &g); }
STAGE(bicubic_n1y) { bicubic_y<-1>(ctx, &g); }
STAGE(bicubic_p1y) { bicubic_y<+1>(ctx, &g); }
STAGE(bicubic_p3y) { bicubic_y<+3>(ctx, &g); }
template <typename T>
SI SkNi offset_and_ptr(T** ptr, const void* ctx, const SkNf& x, const SkNf& y) {
auto sc = (const SkImageShaderContext*)ctx;