SkRasterPipeline refactor
- Give body and tail functions separate types. This frees a register in body functions, especially important for Windows. - Fill out default, SSE4.1, and HSW versions of all functions. This means we don't have to mess around with SkNf_abi... all functions come from the same compilation unit where SkNf is a single consistent type. - Move Stage::next() into SkRasterPipeline_opts.h as a static inline function. - Remove Stage::ctx() entirely... fCtx is literally the same thing. This is a step along the way toward building the entire pipeline in src/opts, removing the need for all the stages to be functions living in SkOpts. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3680 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot,Test-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Debug-ASAN-Trybot Change-Id: I7de78ffebc15b9bad4eda187c9f50369cd7e5e42 Reviewed-on: https://skia-review.googlesource.com/3680 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
d2fe3bce07
commit
2878e76247
@ -144,8 +144,8 @@ namespace SkOpts {
|
||||
static_assert(SK_ARRAY_COUNT(body) == SkRasterPipeline::kNumStockStages, "");
|
||||
|
||||
SkOpts::VoidFn tail[] = {
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::just_return,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::swap_src_dst,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::just_return_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::swap_src_dst_tail,
|
||||
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::store_565_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::store_srgb_tail,
|
||||
@ -163,34 +163,34 @@ namespace SkOpts {
|
||||
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::lerp_u8_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::lerp_565_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::lerp_constant_float,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::lerp_constant_float_tail,
|
||||
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::constant_color,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::constant_color_tail,
|
||||
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dst,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dstatop,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dstin,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dstout,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dstover,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::srcatop,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::srcin,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::srcout,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::srcover,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::clear,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::modulate,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::multiply,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::plus_,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::screen,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::xor_,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::colorburn,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::colordodge,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::darken,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::difference,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::exclusion,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::hardlight,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::lighten,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::overlay,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::softlight,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dst_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dstatop_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dstin_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dstout_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::dstover_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::srcatop_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::srcin_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::srcout_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::srcover_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::clear_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::modulate_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::multiply_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::plus__tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::screen_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::xor__tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::colorburn_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::colordodge_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::darken_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::difference_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::exclusion_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::hardlight_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::lighten_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::overlay_tail,
|
||||
(SkOpts::VoidFn)SK_OPTS_NS::softlight_tail,
|
||||
};
|
||||
static_assert(SK_ARRAY_COUNT(tail) == SkRasterPipeline::kNumStockStages, "");
|
||||
|
||||
|
@ -55,33 +55,13 @@
|
||||
|
||||
class SkRasterPipeline {
|
||||
public:
|
||||
struct Stage;
|
||||
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
|
||||
using V = SkNx_abi<8,float>;
|
||||
#else
|
||||
using V = SkNx_abi<4,float>;
|
||||
#endif
|
||||
using Fn = void(SK_VECTORCALL *)(Stage*, size_t, size_t, V,V,V,V,
|
||||
V,V,V,V);
|
||||
|
||||
struct Stage {
|
||||
template <typename T>
|
||||
T ctx() { return static_cast<T>(fCtx); }
|
||||
|
||||
void SK_VECTORCALL next(size_t x, size_t tail, V v0, V v1, V v2, V v3,
|
||||
V v4, V v5, V v6, V v7) {
|
||||
// Stages are logically a pipeline, and physically are contiguous in an array.
|
||||
// To get to the next stage, we just increment our pointer to the next array element.
|
||||
((Fn)fNext)(this+1, x,tail, v0,v1,v2,v3, v4,v5,v6,v7);
|
||||
}
|
||||
|
||||
// It makes next() a good bit cheaper if we hold the next function to call here,
|
||||
// rather than logically simpler choice of the function implementing this stage.
|
||||
void (*fNext)();
|
||||
void* fCtx;
|
||||
};
|
||||
|
||||
|
||||
SkRasterPipeline();
|
||||
|
||||
// Run the pipeline constructed with append(), walking x through [x,x+n),
|
||||
|
@ -35,11 +35,6 @@ namespace SkOpts {
|
||||
|
||||
STAGE(lerp_u8);
|
||||
STAGE(lerp_565);
|
||||
#undef STAGE
|
||||
|
||||
#define STAGE(stage) \
|
||||
body[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage; \
|
||||
tail[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage
|
||||
|
||||
STAGE(just_return);
|
||||
STAGE(swap_src_dst);
|
||||
|
@ -21,7 +21,7 @@ namespace SkOpts {
|
||||
srcover_srgb_srgb = sse41::srcover_srgb_srgb;
|
||||
blit_row_s32a_opaque = sse41::blit_row_s32a_opaque;
|
||||
|
||||
#define STAGE(stage) \
|
||||
#define STAGE(stage) \
|
||||
body[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage; \
|
||||
tail[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage##_tail
|
||||
|
||||
@ -41,40 +41,34 @@ namespace SkOpts {
|
||||
|
||||
STAGE(lerp_u8);
|
||||
STAGE(lerp_565);
|
||||
#undef STAGE
|
||||
|
||||
#define STAGE(stage) \
|
||||
body[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage; \
|
||||
tail[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage
|
||||
STAGE(just_return);
|
||||
STAGE(swap_src_dst);
|
||||
STAGE(lerp_constant_float);
|
||||
STAGE(constant_color);
|
||||
|
||||
// The commented-out stages don't actually benefit from SSE 4.1.
|
||||
// To cut down on code bloat we skip them here, using the identical SSE2 defaults.
|
||||
|
||||
//STAGE(lerp_constant_float);
|
||||
//STAGE(constant_color);
|
||||
|
||||
//STAGE(dst);
|
||||
//STAGE(dstatop);
|
||||
//STAGE(dstin);
|
||||
//STAGE(dstout);
|
||||
//STAGE(dstover);
|
||||
//STAGE(srcatop);
|
||||
//STAGE(srcin);
|
||||
//STAGE(srcout);
|
||||
//STAGE(srcover);
|
||||
//STAGE(clear);
|
||||
//STAGE(modulate);
|
||||
//STAGE(multiply);
|
||||
//STAGE(plus_);
|
||||
//STAGE(screen);
|
||||
//STAGE(xor_);
|
||||
STAGE(dst);
|
||||
STAGE(dstatop);
|
||||
STAGE(dstin);
|
||||
STAGE(dstout);
|
||||
STAGE(dstover);
|
||||
STAGE(srcatop);
|
||||
STAGE(srcin);
|
||||
STAGE(srcout);
|
||||
STAGE(srcover);
|
||||
STAGE(clear);
|
||||
STAGE(modulate);
|
||||
STAGE(multiply);
|
||||
STAGE(plus_);
|
||||
STAGE(screen);
|
||||
STAGE(xor_);
|
||||
STAGE(colorburn);
|
||||
STAGE(colordodge);
|
||||
//STAGE(darken);
|
||||
//STAGE(difference);
|
||||
//STAGE(exclusion);
|
||||
STAGE(darken);
|
||||
STAGE(difference);
|
||||
STAGE(exclusion);
|
||||
STAGE(hardlight);
|
||||
//STAGE(lighten);
|
||||
STAGE(lighten);
|
||||
STAGE(overlay);
|
||||
STAGE(softlight);
|
||||
#undef STAGE
|
||||
|
@ -12,36 +12,53 @@
|
||||
#include "SkPM4f.h"
|
||||
#include "SkRasterPipeline.h"
|
||||
#include "SkSRGB.h"
|
||||
#include <utility>
|
||||
|
||||
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
|
||||
static constexpr int N = 8;
|
||||
#else
|
||||
static constexpr int N = 4;
|
||||
#endif
|
||||
|
||||
using SkNf_abi = SkRasterPipeline::V;
|
||||
static constexpr auto N = sizeof(SkNf_abi) / sizeof(float);
|
||||
using SkNf = SkNx<N, float>;
|
||||
using SkNi = SkNx<N, int>;
|
||||
using SkNh = SkNx<N, uint16_t>;
|
||||
|
||||
using Body = void(SK_VECTORCALL *)(SkRasterPipeline::Stage*, size_t,
|
||||
SkNf,SkNf,SkNf,SkNf,
|
||||
SkNf,SkNf,SkNf,SkNf);
|
||||
using Tail = void(SK_VECTORCALL *)(SkRasterPipeline::Stage*, size_t, size_t,
|
||||
SkNf,SkNf,SkNf,SkNf,
|
||||
SkNf,SkNf,SkNf,SkNf);
|
||||
|
||||
#define SI static inline
|
||||
|
||||
template <typename Fn, typename... Args>
|
||||
SI void next(SkRasterPipeline::Stage* st, Args&&... args) {
|
||||
// Stages are logically a pipeline, and physically are contiguous in an array.
|
||||
// To get to the next stage, we just increment our pointer to the next array element.
|
||||
((Fn)st->fNext)(st+1, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
#define STAGE(name, kCallNext) \
|
||||
template <bool kIsTail> \
|
||||
static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
|
||||
SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
|
||||
SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf_abi R, SkNf_abi G, SkNf_abi B, SkNf_abi A, \
|
||||
SkNf_abi DR, SkNf_abi DG, SkNf_abi DB, SkNf_abi DA) { \
|
||||
SkNf r=R,g=G,b=B,a=A, dr=DR,dg=DG,db=DB,da=DA; \
|
||||
name##_kernel<false>(st->ctx<void*>(), x,0, r,g,b,a, dr,dg,db,da); \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
name##_kernel<false>(st->fCtx, x,0, r,g,b,a, dr,dg,db,da); \
|
||||
if (kCallNext) { \
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
next<Body>(st, x, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
} \
|
||||
SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf_abi R, SkNf_abi G, SkNf_abi B, SkNf_abi A, \
|
||||
SkNf_abi DR, SkNf_abi DG, SkNf_abi DB, SkNf_abi DA) { \
|
||||
SkNf r=R,g=G,b=B,a=A, dr=DR,dg=DG,db=DB,da=DA; \
|
||||
name##_kernel<true>(st->ctx<void*>(), x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
name##_kernel<true>(st->fCtx, x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
if (kCallNext) { \
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
next<Tail>(st, x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
} \
|
||||
template <bool kIsTail> \
|
||||
@ -54,15 +71,23 @@ using SkNh = SkNx<N, uint16_t>;
|
||||
#define RGBA_XFERMODE(name) \
|
||||
static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
|
||||
const SkNf& d, const SkNf& da); \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf_abi R, SkNf_abi G, SkNf_abi B, SkNf_abi A, \
|
||||
SkNf_abi DR, SkNf_abi DG, SkNf_abi DB, SkNf_abi DA) { \
|
||||
SkNf r=R,g=G,b=B,a=A, dr=DR,dg=DG,db=DB,da=DA; \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
r = name##_kernel(r,a,dr,da); \
|
||||
g = name##_kernel(g,a,dg,da); \
|
||||
b = name##_kernel(b,a,db,da); \
|
||||
a = name##_kernel(a,a,da,da); \
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
next<Body>(st, x, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
r = name##_kernel(r,a,dr,da); \
|
||||
g = name##_kernel(g,a,dg,da); \
|
||||
b = name##_kernel(b,a,db,da); \
|
||||
a = name##_kernel(a,a,da,da); \
|
||||
next<Tail>(st, x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
|
||||
const SkNf& d, const SkNf& da)
|
||||
@ -71,15 +96,23 @@ using SkNh = SkNx<N, uint16_t>;
|
||||
#define RGB_XFERMODE(name) \
|
||||
static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
|
||||
const SkNf& d, const SkNf& da); \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf_abi R, SkNf_abi G, SkNf_abi B, SkNf_abi A, \
|
||||
SkNf_abi DR, SkNf_abi DG, SkNf_abi DB, SkNf_abi DA) { \
|
||||
SkNf r=R,g=G,b=B,a=A, dr=DR,dg=DG,db=DB,da=DA; \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
r = name##_kernel(r,a,dr,da); \
|
||||
g = name##_kernel(g,a,dg,da); \
|
||||
b = name##_kernel(b,a,db,da); \
|
||||
a = a + (da * (1.0f-a)); \
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
next<Body>(st, x, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
r = name##_kernel(r,a,dr,da); \
|
||||
g = name##_kernel(g,a,dg,da); \
|
||||
b = name##_kernel(b,a,db,da); \
|
||||
a = a + (da * (1.0f-a)); \
|
||||
next<Tail>(st, x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
|
||||
const SkNf& d, const SkNf& da)
|
||||
@ -90,11 +123,11 @@ namespace SK_OPTS_NS {
|
||||
SI void run_pipeline(size_t x, size_t n,
|
||||
void (*vBodyStart)(), SkRasterPipeline::Stage* body,
|
||||
void (*vTailStart)(), SkRasterPipeline::Stage* tail) {
|
||||
auto bodyStart = (SkRasterPipeline::Fn)vBodyStart,
|
||||
tailStart = (SkRasterPipeline::Fn)vTailStart;
|
||||
SkNf v{0}; // TODO: uninitialized would be a bit faster, but some compilers are whiny.
|
||||
auto bodyStart = (Body)vBodyStart;
|
||||
auto tailStart = (Tail)vTailStart;
|
||||
SkNf v; // Fastest to start uninitialized.
|
||||
while (n >= N) {
|
||||
bodyStart(body, x,0, v,v,v,v, v,v,v,v);
|
||||
bodyStart(body, x, v,v,v,v, v,v,v,v);
|
||||
x += N;
|
||||
n -= N;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user