SkNx_abi for passing Sk4f as function arguments, etc.
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot,Test-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Debug-ASAN-Trybot BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3422 Change-Id: Idc0a192faa7ff843aef023229186580c69baf1f7 Reviewed-on: https://skia-review.googlesource.com/3422 Reviewed-by: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
1e76464f87
commit
511ea17b96
@ -16,6 +16,11 @@
|
||||
#include <math.h>
|
||||
#include <type_traits>
|
||||
|
||||
// These _abi types are data-only, and so can be used to store SkNx in structs or
|
||||
// pass them as function parameters or return values, even across compilation units.
|
||||
template <int N, typename T> struct SkNx_abi { SkNx_abi<N/2,T> lo, hi; };
|
||||
template < typename T> struct SkNx_abi<1,T> { T val; };
|
||||
|
||||
namespace {
|
||||
|
||||
#define SI static inline
|
||||
@ -42,6 +47,9 @@ struct SkNx {
|
||||
static_assert(N==16, "");
|
||||
}
|
||||
|
||||
SkNx(const SkNx_abi<N,T>& a) : fLo(a.lo), fHi(a.hi) {}
|
||||
operator SkNx_abi<N,T>() const { return { (SkNx_abi<N/2,T>)fLo, (SkNx_abi<N/2,T>)fHi }; }
|
||||
|
||||
T operator[](int k) const {
|
||||
SkASSERT(0 <= k && k < N);
|
||||
return k < N/2 ? fLo[k] : fHi[k-N/2];
|
||||
@ -129,6 +137,9 @@ struct SkNx<1,T> {
|
||||
SkNx() = default;
|
||||
SkNx(T v) : fVal(v) {}
|
||||
|
||||
SkNx(const SkNx_abi<1,T>& a) : fVal(a.val) {}
|
||||
operator SkNx_abi<1,T>() const { return { fVal }; }
|
||||
|
||||
// Android complains against unused parameters, so we guard it
|
||||
T operator[](int SkDEBUGCODE(k)) const {
|
||||
SkASSERT(k == 0);
|
||||
|
@ -57,9 +57,9 @@ class SkRasterPipeline {
|
||||
public:
|
||||
struct Stage;
|
||||
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
|
||||
using V = Sk8f;
|
||||
using V = SkNx_abi<8,float>;
|
||||
#else
|
||||
using V = Sk4f;
|
||||
using V = SkNx_abi<4,float>;
|
||||
#endif
|
||||
using Fn = void(SK_VECTORCALL *)(Stage*, size_t, size_t, V,V,V,V,
|
||||
V,V,V,V);
|
||||
|
@ -12,6 +12,8 @@
|
||||
|
||||
#define SKNX_IS_FAST
|
||||
|
||||
template <> struct SkNx_abi<4,float> { float32x4_t vec; };
|
||||
|
||||
namespace {
|
||||
|
||||
// ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it:
|
||||
@ -111,6 +113,9 @@ public:
|
||||
SkNx(float val) : fVec(vdupq_n_f32(val)) {}
|
||||
SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
|
||||
|
||||
SkNx(const SkNx_abi<4,float>& a) : fVec(a.vec) {}
|
||||
operator SkNx_abi<4,float>() const { return { fVec }; }
|
||||
|
||||
static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); }
|
||||
void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); }
|
||||
|
||||
|
@ -15,6 +15,8 @@
|
||||
|
||||
#define SKNX_IS_FAST
|
||||
|
||||
template <> struct SkNx_abi<4,float> { __m128 vec; };
|
||||
|
||||
namespace {
|
||||
|
||||
template <>
|
||||
@ -71,6 +73,9 @@ public:
|
||||
SkNx(float val) : fVec( _mm_set1_ps(val) ) {}
|
||||
SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
|
||||
|
||||
SkNx(const SkNx_abi<4,float>& a) : fVec(a.vec) {}
|
||||
operator SkNx_abi<4,float>() const { return { fVec }; }
|
||||
|
||||
static SkNx Load(const void* ptr) { return _mm_loadu_ps((const float*)ptr); }
|
||||
void store(void* ptr) const { _mm_storeu_ps((float*)ptr, fVec); }
|
||||
|
||||
|
@ -13,35 +13,38 @@
|
||||
#include "SkRasterPipeline.h"
|
||||
#include "SkSRGB.h"
|
||||
|
||||
using SkNf = SkRasterPipeline::V;
|
||||
static constexpr auto N = sizeof(SkNf) / sizeof(float);
|
||||
using SkNf_abi = SkRasterPipeline::V;
|
||||
static constexpr auto N = sizeof(SkNf_abi) / sizeof(float);
|
||||
using SkNf = SkNx<N, float>;
|
||||
using SkNi = SkNx<N, int>;
|
||||
using SkNh = SkNx<N, uint16_t>;
|
||||
|
||||
#define SI static inline
|
||||
|
||||
#define STAGE(name, kCallNext) \
|
||||
static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
|
||||
SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
|
||||
SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
name##_kernel(st->ctx<void*>(), x,0, r,g,b,a, dr,dg,db,da); \
|
||||
if (kCallNext) { \
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
} \
|
||||
SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
name##_kernel(st->ctx<void*>(), x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
if (kCallNext) { \
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
} \
|
||||
static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
|
||||
SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
|
||||
#define STAGE(name, kCallNext) \
|
||||
static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
|
||||
SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
|
||||
SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf_abi R, SkNf_abi G, SkNf_abi B, SkNf_abi A, \
|
||||
SkNf_abi DR, SkNf_abi DG, SkNf_abi DB, SkNf_abi DA) { \
|
||||
SkNf r=R,g=G,b=B,a=A, dr=DR,dg=DG,db=DB,da=DA; \
|
||||
name##_kernel(st->ctx<void*>(), x,0, r,g,b,a, dr,dg,db,da); \
|
||||
if (kCallNext) { \
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
} \
|
||||
SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf_abi R, SkNf_abi G, SkNf_abi B, SkNf_abi A, \
|
||||
SkNf_abi DR, SkNf_abi DG, SkNf_abi DB, SkNf_abi DA) { \
|
||||
SkNf r=R,g=G,b=B,a=A, dr=DR,dg=DG,db=DB,da=DA; \
|
||||
name##_kernel(st->ctx<void*>(), x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
if (kCallNext) { \
|
||||
st->next(x,tail, r,g,b,a, dr,dg,db,da); \
|
||||
} \
|
||||
} \
|
||||
static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
|
||||
SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
|
||||
SkNf& dr, SkNf& dg, SkNf& db, SkNf& da)
|
||||
|
||||
|
||||
@ -50,8 +53,9 @@ using SkNh = SkNx<N, uint16_t>;
|
||||
static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
|
||||
const SkNf& d, const SkNf& da); \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
SkNf_abi R, SkNf_abi G, SkNf_abi B, SkNf_abi A, \
|
||||
SkNf_abi DR, SkNf_abi DG, SkNf_abi DB, SkNf_abi DA) { \
|
||||
SkNf r=R,g=G,b=B,a=A, dr=DR,dg=DG,db=DB,da=DA; \
|
||||
r = name##_kernel(r,a,dr,da); \
|
||||
g = name##_kernel(g,a,dg,da); \
|
||||
b = name##_kernel(b,a,db,da); \
|
||||
@ -66,8 +70,9 @@ using SkNh = SkNx<N, uint16_t>;
|
||||
static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
|
||||
const SkNf& d, const SkNf& da); \
|
||||
SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
|
||||
SkNf r, SkNf g, SkNf b, SkNf a, \
|
||||
SkNf dr, SkNf dg, SkNf db, SkNf da) { \
|
||||
SkNf_abi R, SkNf_abi G, SkNf_abi B, SkNf_abi A, \
|
||||
SkNf_abi DR, SkNf_abi DG, SkNf_abi DB, SkNf_abi DA) { \
|
||||
SkNf r=R,g=G,b=B,a=A, dr=DR,dg=DG,db=DB,da=DA; \
|
||||
r = name##_kernel(r,a,dr,da); \
|
||||
g = name##_kernel(g,a,dg,da); \
|
||||
b = name##_kernel(b,a,db,da); \
|
||||
@ -85,7 +90,7 @@ namespace SK_OPTS_NS {
|
||||
void (*vTailStart)(), SkRasterPipeline::Stage* tail) {
|
||||
auto bodyStart = (SkRasterPipeline::Fn)vBodyStart,
|
||||
tailStart = (SkRasterPipeline::Fn)vTailStart;
|
||||
SkNf v; // Fastest to start uninitialized.
|
||||
SkNf v{0}; // TODO: uninitialized would be a bit faster, but some compilers are whiny.
|
||||
while (n >= N) {
|
||||
bodyStart(body, x,0, v,v,v,v, v,v,v,v);
|
||||
x += N;
|
||||
|
Loading…
Reference in New Issue
Block a user