composeshader stages

needed to add two helper stages for composeshader

load_rgba, store_rgba

These just read/write the r,g,b,a registers to context memory, making no promise as to how the
memory is formatted (e.g. interleaved -vs- planar).

Note that we have similar existing stages, but they did not seem to suit:

constant_color

This guy loads 4 floats from memory, and splats them into registers. I need to load 4 entire
registers.

load_f32, store_f32

These offset where they read/write based on the 'x' register, plus they guarantee that the memory
will be interleaved ala SkPM4f.

Bug: skia:
Change-Id: Iaa81f950660b837bdb34416ab3e342d56a92239b
Reviewed-on: https://skia-review.googlesource.com/16716
Reviewed-by: Mike Klein <mtklein@chromium.org>
Commit-Queue: Mike Reed <reed@google.com>
This commit is contained in:
Mike Reed 2017-05-15 09:34:22 -04:00 committed by Skia Commit-Bot
parent 787a16dd9e
commit 9959f723c3
6 changed files with 3596 additions and 3309 deletions

View File

@ -6,13 +6,16 @@
*/
#include "SkArenaAlloc.h"
#include "SkBlendModePriv.h"
#include "SkComposeShader.h"
#include "SkColorFilter.h"
#include "SkColorPriv.h"
#include "SkColorShader.h"
#include "SkRasterPipeline.h"
#include "SkReadBuffer.h"
#include "SkWriteBuffer.h"
#include "SkString.h"
#include "../jumper/SkJumper.h"
sk_sp<SkShader> SkShader::MakeComposeShader(sk_sp<SkShader> dst, sk_sp<SkShader> src,
SkBlendMode mode) {
@ -117,6 +120,45 @@ bool SkComposeShader::asACompose(ComposeRec* rec) const {
return true;
}
bool SkComposeShader::onAppendStages(SkRasterPipeline* pipeline, SkColorSpace* dstCS,
SkArenaAlloc* alloc, const SkMatrix& ctm,
const SkPaint& paint, const SkMatrix* localM) const {
struct Storage {
float fXY[4 * SkJumper_kMaxStride];
float fRGBA[4 * SkJumper_kMaxStride];
float fAlpha;
};
auto storage = alloc->make<Storage>();
// We need to save off device x,y (inputs to shader), since after calling fShaderA they
// will be smashed, and I'll need them again for fShaderB. store_rgba saves off 4 registers
// even though we only need to save r,g.
pipeline->append(SkRasterPipeline::store_rgba, storage->fXY);
if (!fShaderB->appendStages(pipeline, dstCS, alloc, ctm, paint, localM)) { // SRC
return false;
}
// This outputs r,g,b,a, which we'll need later when we apply the mode, but we save it off now
// since fShaderB will overwrite them.
pipeline->append(SkRasterPipeline::store_rgba, storage->fRGBA);
// Now we restore the device x,y for the next shader
pipeline->append(SkRasterPipeline::load_rgba, storage->fXY);
if (!fShaderA->appendStages(pipeline, dstCS, alloc, ctm, paint, localM)) { // DST
return false;
}
// We now have our logical 'dst' in r,g,b,a, but we need it in dr,dg,db,da for the mode
// so we have to shuttle them. If we had a stage the would load_into_dst, then we could
// reverse the two shader invocations, and avoid this move...
pipeline->append(SkRasterPipeline::move_src_dst);
pipeline->append(SkRasterPipeline::load_rgba, storage->fRGBA);
// Idea: should time this, and see if it helps to have custom versions of the overflow modes
// that do their own clamping, avoiding the overhead of an extra stage.
SkBlendMode_AppendStages(fMode, pipeline);
if (SkBlendMode_CanOverflow(fMode)) {
pipeline->append(SkRasterPipeline::clamp_a);
}
return true;
}
// larger is better (fewer times we have to loop), but we shouldn't
// take up too much stack-space (each element is 4 bytes)

View File

@ -72,6 +72,8 @@ protected:
void flatten(SkWriteBuffer&) const override;
Context* onMakeContext(const ContextRec&, SkArenaAlloc*) const override;
sk_sp<SkShader> onMakeColorSpace(SkColorSpaceXformer* xformer) const override;
bool onAppendStages(SkRasterPipeline*, SkColorSpace* dstCS, SkArenaAlloc*,
const SkMatrix&, const SkPaint&, const SkMatrix* localM) const override;
private:
sk_sp<SkShader> fShaderA;

View File

@ -72,7 +72,7 @@
M(load_8888) M(store_8888) \
M(load_u16_be) M(load_rgb_u16_be) M(store_u16_be) \
M(load_tables_u16_be) M(load_tables_rgb_u16_be) \
M(load_tables) \
M(load_tables) M(load_rgba) M(store_rgba) \
M(scale_u8) M(scale_1_float) \
M(lerp_u8) M(lerp_565) M(lerp_1_float) \
M(dstatop) M(dstin) M(dstout) M(dstover) \

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -322,6 +322,7 @@ STAGE(dither) {
b += c->rate*dither;
}
// load 4 floats from memory, and splat them into r,g,b,a
STAGE(constant_color) {
auto rgba = (const float*)ctx;
r = rgba[0];
@ -330,6 +331,24 @@ STAGE(constant_color) {
a = rgba[3];
}
// load registers r,g,b,a from context (mirrors store_rgba)
STAGE(load_rgba) {
auto ptr = (const float*)ctx;
r = unaligned_load<F>(ptr + 0*kStride);
g = unaligned_load<F>(ptr + 1*kStride);
b = unaligned_load<F>(ptr + 2*kStride);
a = unaligned_load<F>(ptr + 3*kStride);
}
// store registers r,g,b,a into context (mirrors load_rgba)
STAGE(store_rgba) {
auto ptr = (float*)ctx;
memcpy(ptr + 0*kStride, &r, sizeof(F));
memcpy(ptr + 1*kStride, &g, sizeof(F));
memcpy(ptr + 2*kStride, &b, sizeof(F));
memcpy(ptr + 3*kStride, &a, sizeof(F));
}
// Most blend modes apply the same logic to each channel.
#define BLEND_MODE(name) \
SI F name##_channel(F s, F d, F sa, F da); \