plumb y through to SkJumper
There'll still be a little more refactoring after this, but this is the main thing we want to do. This makes y available in a general-purpose register in pipeline stages, just like x. Stages that need y (seed_shader and dither) can just use it rather than pulling it off a context pointer. seed_shader loses its context pointer, and dither's gets simpler. Change-Id: Ic2d1e13b03fb45b73e308b38aafbb3a14c29cf7f Reviewed-on: https://skia-review.googlesource.com/18383 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
97b10ac484
commit
9b10f8ff0d
@ -359,17 +359,16 @@ static void convert_with_pipeline(const SkImageInfo& dstInfo, void* dstRow, size
|
||||
SkASSERT(premulState == dat || kOpaque_SkAlphaType == srcInfo.alphaType());
|
||||
|
||||
// We'll dither if we're decreasing precision below 32-bit.
|
||||
int y;
|
||||
SkJumper_DitherCtx dither = {&y, 0.0f};
|
||||
float dither_rate = 0.0f;
|
||||
if (srcInfo.bytesPerPixel() > dstInfo.bytesPerPixel()) {
|
||||
switch (dstInfo.colorType()) {
|
||||
case kRGB_565_SkColorType: dither.rate = 1/63.0f; break;
|
||||
case kARGB_4444_SkColorType: dither.rate = 1/15.0f; break;
|
||||
default: dither.rate = 0.0f; break;
|
||||
case kRGB_565_SkColorType: dither_rate = 1/63.0f; break;
|
||||
case kARGB_4444_SkColorType: dither_rate = 1/15.0f; break;
|
||||
default: dither_rate = 0.0f; break;
|
||||
}
|
||||
}
|
||||
if (dither.rate > 0) {
|
||||
pipeline.append(SkRasterPipeline::dither, &dither);
|
||||
if (dither_rate > 0) {
|
||||
pipeline.append(SkRasterPipeline::dither, &dither_rate);
|
||||
}
|
||||
|
||||
switch (dstInfo.colorType()) {
|
||||
@ -395,8 +394,7 @@ static void convert_with_pipeline(const SkImageInfo& dstInfo, void* dstRow, size
|
||||
}
|
||||
|
||||
auto run = pipeline.compile();
|
||||
// This y is declared above when handling dither (which needs to know y).
|
||||
for (y = 0; y < srcInfo.height(); ++y) {
|
||||
for (int y = 0; y < srcInfo.height(); ++y) {
|
||||
run(0,y, srcInfo.width());
|
||||
// The pipeline has pointers to srcRow and dstRow, so we just need to update them in the
|
||||
// loop to move between rows of src/dst.
|
||||
|
@ -78,8 +78,7 @@ private:
|
||||
void* fDstPtr = nullptr;
|
||||
const void* fMaskPtr = nullptr;
|
||||
float fCurrentCoverage = 0.0f;
|
||||
int fCurrentY = 0;
|
||||
SkJumper_DitherCtx fDitherCtx = { &fCurrentY, 0.0f };
|
||||
float fDitherRate = 0.0f;
|
||||
|
||||
std::vector<SkPM4f> fShaderBuffer;
|
||||
|
||||
@ -180,7 +179,7 @@ SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
|
||||
} else {
|
||||
// If the shader's not constant, it'll need seeding with x,y.
|
||||
if (!is_constant) {
|
||||
colorPipeline->append(SkRasterPipeline::seed_shader, &blitter->fCurrentY);
|
||||
colorPipeline->append(SkRasterPipeline::seed_shader);
|
||||
}
|
||||
colorPipeline->extend(shaderPipeline);
|
||||
}
|
||||
@ -197,13 +196,13 @@ SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
|
||||
if (wants_dither ||
|
||||
(paint.isDither() && dst.info().colorType() == kRGB_565_SkColorType)) {
|
||||
switch (dst.info().colorType()) {
|
||||
default: blitter->fDitherCtx.rate = 0.0f; break;
|
||||
case kRGB_565_SkColorType: blitter->fDitherCtx.rate = 1/63.0f; break;
|
||||
default: blitter->fDitherRate = 0.0f; break;
|
||||
case kRGB_565_SkColorType: blitter->fDitherRate = 1/63.0f; break;
|
||||
case kRGBA_8888_SkColorType:
|
||||
case kBGRA_8888_SkColorType: blitter->fDitherCtx.rate = 1/255.0f; break;
|
||||
case kBGRA_8888_SkColorType: blitter->fDitherRate = 1/255.0f; break;
|
||||
}
|
||||
}
|
||||
is_constant = is_constant && (blitter->fDitherCtx.rate == 0.0f);
|
||||
is_constant = is_constant && (blitter->fDitherRate == 0.0f);
|
||||
|
||||
// We're logically done here. The code between here and return blitter is all optimization.
|
||||
|
||||
@ -263,10 +262,10 @@ void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p) const {
|
||||
if (fDst.info().gammaCloseToSRGB()) {
|
||||
p->append(SkRasterPipeline::to_srgb);
|
||||
}
|
||||
if (fDitherCtx.rate > 0.0f) {
|
||||
if (fDitherRate > 0.0f) {
|
||||
// We dither after any sRGB transfer function to make sure our 1/255.0f is sensible
|
||||
// over the whole range. If we did it before, 1/255.0f is too big a rate near zero.
|
||||
p->append(SkRasterPipeline::dither, &fDitherCtx);
|
||||
p->append(SkRasterPipeline::dither, &fDitherRate);
|
||||
}
|
||||
|
||||
if (fDst.info().colorType() == kBGRA_8888_SkColorType) {
|
||||
@ -305,7 +304,6 @@ void SkRasterPipelineBlitter::maybe_shade(int x, int y, int w) {
|
||||
|
||||
void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
|
||||
fDstPtr = fDst.writable_addr(0,y);
|
||||
fCurrentY = y;
|
||||
|
||||
if (fCanMemsetInBlitH) {
|
||||
switch (fDst.shiftPerPixel()) {
|
||||
@ -323,7 +321,7 @@ void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
|
||||
if (fBlend == SkBlendMode::kSrcOver
|
||||
&& fDst.info().colorType() == kRGBA_8888_SkColorType
|
||||
&& !fDst.colorSpace()
|
||||
&& fDitherCtx.rate == 0.0f) {
|
||||
&& fDitherRate == 0.0f) {
|
||||
p.append(SkRasterPipeline::srcover_rgba_8888, &fDstPtr);
|
||||
} else {
|
||||
if (fBlend != SkBlendMode::kSrc) {
|
||||
@ -358,7 +356,6 @@ void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const
|
||||
}
|
||||
|
||||
fDstPtr = fDst.writable_addr(0,y);
|
||||
fCurrentY = y;
|
||||
for (int16_t run = *runs; run > 0; run = *runs) {
|
||||
switch (*aa) {
|
||||
case 0x00: break;
|
||||
@ -411,7 +408,6 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip)
|
||||
int x = clip.left();
|
||||
for (int y = clip.top(); y < clip.bottom(); y++) {
|
||||
fDstPtr = fDst.writable_addr(0,y);
|
||||
fCurrentY = y;
|
||||
|
||||
this->maybe_shade(x,y,clip.width());
|
||||
switch (mask.fFormat) {
|
||||
|
@ -43,24 +43,24 @@ extern "C" {
|
||||
// We'll just run portable code.
|
||||
|
||||
#elif defined(__aarch64__)
|
||||
void ASM(start_pipeline,aarch64)(size_t, void**, K*, size_t);
|
||||
void ASM(start_pipeline,aarch64)(size_t,size_t,size_t, void**, K*);
|
||||
StageFn ASM(just_return,aarch64);
|
||||
#define M(st) StageFn ASM(st,aarch64);
|
||||
SK_RASTER_PIPELINE_STAGES(M)
|
||||
#undef M
|
||||
|
||||
#elif defined(__arm__)
|
||||
void ASM(start_pipeline,vfp4)(size_t, void**, K*, size_t);
|
||||
void ASM(start_pipeline,vfp4)(size_t,size_t,size_t, void**, K*);
|
||||
StageFn ASM(just_return,vfp4);
|
||||
#define M(st) StageFn ASM(st,vfp4);
|
||||
SK_RASTER_PIPELINE_STAGES(M)
|
||||
#undef M
|
||||
|
||||
#elif defined(__x86_64__) || defined(_M_X64)
|
||||
void ASM(start_pipeline,hsw )(size_t, void**, K*, size_t);
|
||||
void ASM(start_pipeline,avx )(size_t, void**, K*, size_t);
|
||||
void ASM(start_pipeline,sse41)(size_t, void**, K*, size_t);
|
||||
void ASM(start_pipeline,sse2 )(size_t, void**, K*, size_t);
|
||||
void ASM(start_pipeline,hsw )(size_t,size_t,size_t, void**, K*);
|
||||
void ASM(start_pipeline,avx )(size_t,size_t,size_t, void**, K*);
|
||||
void ASM(start_pipeline,sse41)(size_t,size_t,size_t, void**, K*);
|
||||
void ASM(start_pipeline,sse2 )(size_t,size_t,size_t, void**, K*);
|
||||
|
||||
StageFn ASM(just_return,hsw),
|
||||
ASM(just_return,avx),
|
||||
@ -82,7 +82,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// Portable, single-pixel stages.
|
||||
void sk_start_pipeline(size_t, void**, K*, size_t);
|
||||
void sk_start_pipeline(size_t,size_t,size_t, void**, K*);
|
||||
StageFn sk_just_return;
|
||||
#define M(st) StageFn sk_##st;
|
||||
SK_RASTER_PIPELINE_STAGES(M)
|
||||
@ -96,7 +96,7 @@ static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
|
||||
// Engines comprise everything we need to run SkRasterPipelines.
|
||||
struct SkJumper_Engine {
|
||||
StageFn* stages[kNumStages];
|
||||
void (*start_pipeline)(size_t, void**, K*, size_t);
|
||||
void (*start_pipeline)(size_t,size_t,size_t, void**, K*);
|
||||
StageFn* just_return;
|
||||
};
|
||||
|
||||
@ -192,10 +192,9 @@ void SkRasterPipeline::run(size_t x, size_t y, size_t n) const {
|
||||
|
||||
// Best to not use fAlloc here... we can't bound how often run() will be called.
|
||||
SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
|
||||
const size_t limit = x+n;
|
||||
|
||||
BuildPipeline(fStages, gEngine, program.get() + fSlotsNeeded);
|
||||
gEngine.start_pipeline(x, program.get(), &kConstants, limit);
|
||||
gEngine.start_pipeline(x,y,x+n, program.get(), &kConstants);
|
||||
}
|
||||
|
||||
std::function<void(size_t, size_t, size_t)> SkRasterPipeline::compile() const {
|
||||
@ -208,7 +207,6 @@ std::function<void(size_t, size_t, size_t)> SkRasterPipeline::compile() const {
|
||||
BuildPipeline(fStages, gEngine, program + fSlotsNeeded);
|
||||
|
||||
return [=](size_t x, size_t y, size_t n) {
|
||||
const size_t limit = x+n;
|
||||
gEngine.start_pipeline(x, program, &kConstants, limit);
|
||||
gEngine.start_pipeline(x,y,x+n, program, &kConstants);
|
||||
};
|
||||
}
|
||||
|
@ -95,11 +95,6 @@ struct SkJumper_ParametricTransferFunction {
|
||||
float G, A,B,C,D,E,F;
|
||||
};
|
||||
|
||||
struct SkJumper_DitherCtx {
|
||||
const int* y;
|
||||
float rate;
|
||||
};
|
||||
|
||||
struct SkJumper_GradientCtx {
|
||||
size_t stopCount;
|
||||
float* fs[4];
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -77,38 +77,40 @@ struct LazyCtx {
|
||||
// tail == 0 ~~> work on a full kStride pixels
|
||||
// tail != 0 ~~> work on only the first tail pixels
|
||||
// tail is always < kStride.
|
||||
using Stage = void(size_t x, void** program, K* k, size_t tail, F,F,F,F, F,F,F,F);
|
||||
//
|
||||
// We keep program the second argument, so that it's passed in rsi for load_and_inc().
|
||||
using Stage = void(K* k, void** program, size_t x, size_t y, size_t tail, F,F,F,F, F,F,F,F);
|
||||
|
||||
MAYBE_MSABI
|
||||
extern "C" void WRAP(start_pipeline)(size_t x, void** program, K* k, size_t limit) {
|
||||
extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t limit, void** program, K* k) {
|
||||
F v{};
|
||||
auto start = (Stage*)load_and_inc(program);
|
||||
while (x + kStride <= limit) {
|
||||
start(x,program,k,0, v,v,v,v, v,v,v,v);
|
||||
start(k,program,x,y,0, v,v,v,v, v,v,v,v);
|
||||
x += kStride;
|
||||
}
|
||||
if (size_t tail = limit - x) {
|
||||
start(x,program,k,tail, v,v,v,v, v,v,v,v);
|
||||
start(k,program,x,y,tail, v,v,v,v, v,v,v,v);
|
||||
}
|
||||
}
|
||||
|
||||
#define STAGE(name) \
|
||||
SI void name##_k(size_t x, LazyCtx ctx, K* k, size_t tail, \
|
||||
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
|
||||
extern "C" void WRAP(name)(size_t x, void** program, K* k, size_t tail, \
|
||||
F r, F g, F b, F a, F dr, F dg, F db, F da) { \
|
||||
LazyCtx ctx(program); \
|
||||
name##_k(x,ctx,k,tail, r,g,b,a, dr,dg,db,da); \
|
||||
auto next = (Stage*)load_and_inc(program); \
|
||||
next(x,program,k,tail, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
SI void name##_k(size_t x, LazyCtx ctx, K* k, size_t tail, \
|
||||
#define STAGE(name) \
|
||||
SI void name##_k(K* k, LazyCtx ctx, size_t x, size_t y, size_t tail, \
|
||||
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
|
||||
extern "C" void WRAP(name)(K* k, void** program, size_t x, size_t y, size_t tail, \
|
||||
F r, F g, F b, F a, F dr, F dg, F db, F da) { \
|
||||
LazyCtx ctx(program); \
|
||||
name##_k(k,ctx,x,y,tail, r,g,b,a, dr,dg,db,da); \
|
||||
auto next = (Stage*)load_and_inc(program); \
|
||||
next(k,program,x,y,tail, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
SI void name##_k(K* k, LazyCtx ctx, size_t x, size_t y, size_t tail, \
|
||||
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da)
|
||||
|
||||
|
||||
// just_return() is a simple no-op stage that only exists to end the chain,
|
||||
// returning back up to start_pipeline(), and from there to the caller.
|
||||
extern "C" void WRAP(just_return)(size_t, void**, K*, F,F,F,F, F,F,F,F) {}
|
||||
extern "C" void WRAP(just_return)(K*, void**, size_t,size_t,size_t, F,F,F,F, F,F,F,F) {}
|
||||
|
||||
|
||||
// We could start defining normal Stages now. But first, some helper functions.
|
||||
@ -242,8 +244,6 @@ SI U32 ix_and_ptr(T** ptr, const SkJumper_GatherCtx* ctx, F x, F y) {
|
||||
// Now finally, normal Stages!
|
||||
|
||||
STAGE(seed_shader) {
|
||||
auto y = *(const int*)ctx;
|
||||
|
||||
// It's important for speed to explicitly cast(x) and cast(y),
|
||||
// which has the effect of splatting them to vectors before converting to floats.
|
||||
// On Intel this breaks a data dependency on previous loop iterations' registers.
|
||||
@ -255,11 +255,11 @@ STAGE(seed_shader) {
|
||||
}
|
||||
|
||||
STAGE(dither) {
|
||||
auto c = (const SkJumper_DitherCtx*)ctx;
|
||||
auto rate = *(const float*)ctx;
|
||||
|
||||
// Get [(x,y), (x+1,y), (x+2,y), ...] loaded up in integer vectors.
|
||||
U32 X = x + unaligned_load<U32>(k->iota_U32),
|
||||
Y = (uint32_t)*c->y;
|
||||
Y = y;
|
||||
|
||||
// We're doing 8x8 ordered dithering, see https://en.wikipedia.org/wiki/Ordered_dithering.
|
||||
// In this case n=8 and we're using the matrix that looks like 1/64 x [ 0 48 12 60 ... ].
|
||||
@ -278,9 +278,9 @@ STAGE(dither) {
|
||||
// like 0 and 1 unchanged after rounding.
|
||||
F dither = cast(M) * (2/128.0f) - (63/128.0f);
|
||||
|
||||
r += c->rate*dither;
|
||||
g += c->rate*dither;
|
||||
b += c->rate*dither;
|
||||
r += rate*dither;
|
||||
g += rate*dither;
|
||||
b += rate*dither;
|
||||
|
||||
r = max(0, min(r, a));
|
||||
g = max(0, min(g, a));
|
||||
|
Loading…
Reference in New Issue
Block a user