Revert "Add convenient "xyzw" accessors and swizzles to skvx"

This reverts commit 01b02956c7.

Reason for revert: Codegen regressions

Original change's description:
> Add convenient "xyzw" accessors and swizzles to skvx
>
> Change-Id: Ic300285d10679a4e34190ab7b6b08bd1f6d80330
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/454309
> Reviewed-by: Michael Ludwig <michaelludwig@google.com>
> Commit-Queue: Chris Dalton <csmartdalton@google.com>

Bug: skia:12515
Change-Id: Id853e4d9e25c6d2ae622668ef064e1b2b078b824
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/457476
Auto-Submit: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
This commit is contained in:
Chris Dalton 2021-10-08 14:31:37 -06:00 committed by SkCQ
parent 6030e0a2c5
commit c63e913f57
3 changed files with 59 additions and 213 deletions

View File

@ -54,131 +54,67 @@ namespace skvx {
// All Vec have the same simple memory layout, the same as `T vec[N]`.
template <int N, typename T>
struct alignas(N*sizeof(T)) Vec;
template <int... Ix, int N, typename T>
SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>&);
template <int N, typename T>
struct alignas(N*sizeof(T)) VecCommon {
struct alignas(N*sizeof(T)) Vec {
static_assert((N & (N-1)) == 0, "N must be a power of 2.");
static_assert(sizeof(T) >= alignof(T), "Very unexpected type traits: sizeof(T) < alignof(T)");
// Methods on vectors belong in the actual class only if:
// - they must be here, like constructors.
// - they'll definitely never want a specialized implementation.
// Other operations on vectors should be defined outside the type.
SKVX_ALWAYS_INLINE VecCommon() = default;
SKVX_ALWAYS_INLINE VecCommon(std::initializer_list<T> l) {
static_assert(sizeof(Vec<N,T>) == N*sizeof(T), "Vec must be tightly packed.");
static_assert(alignof(Vec<N,T>) == N*sizeof(T), "Vec alignment must support fast loads.");
if (l.size() >= N) {
memcpy(this, l.begin(), N*sizeof(T));
} else {
memcpy(this, l.begin(), l.size()*sizeof(T));
memset((char*)this + l.size()*sizeof(T), 0, (N - l.size())*sizeof(T));
}
}
SKVX_ALWAYS_INLINE T operator[](int i) const {
auto vec = static_cast<const Vec<N,T>*>(this);
if constexpr (N > 1) {
return i < N/2 ? vec->lo[i] : vec->hi[i-N/2];
} else {
return vec->val;
}
}
SKVX_ALWAYS_INLINE T& operator[](int i) {
auto vec = static_cast<Vec<N,T>*>(this);
if constexpr (N > 1) {
return i < N/2 ? vec->lo[i] : vec->hi[i-N/2];
} else {
return vec->val;
}
}
SKVX_ALWAYS_INLINE void store(void* ptr) const {
memcpy(ptr, this, N*sizeof(T));
}
SKVX_ALWAYS_INLINE static Vec<N,T> Load(const void* ptr) {
Vec<N,T> v;
memcpy(&v, ptr, N*sizeof(T));
return v;
}
};
template <int N, typename T>
struct Vec : public VecCommon<N,T> {
SKVX_ALWAYS_INLINE Vec() = default;
SKVX_ALWAYS_INLINE Vec(T s) : lo(s), hi(s) {}
SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> l) : VecCommon<N,T>(l) {}
static_assert(sizeof(T) >= alignof(T), "What kind of crazy T is this?");
Vec<N/2,T> lo, hi;
// Methods belong here in the class declaration of Vec only if:
// - they must be here, like constructors or operator[];
// - they'll definitely never want a specialized implementation.
// Other operations on Vec should be defined outside the type.
SKVX_ALWAYS_INLINE Vec() = default;
template <typename U, typename=std::enable_if_t<std::is_convertible<U,T>::value>>
SKVX_ALWAYS_INLINE
Vec(U x) : lo(x), hi(x) {}
SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) {
T vals[N] = {0};
memcpy(vals, xs.begin(), std::min(xs.size(), (size_t)N)*sizeof(T));
lo = Vec<N/2,T>::Load(vals + 0);
hi = Vec<N/2,T>::Load(vals + N/2);
}
SKVX_ALWAYS_INLINE T operator[](int i) const { return i < N/2 ? lo[i] : hi[i-N/2]; }
SKVX_ALWAYS_INLINE T& operator[](int i) { return i < N/2 ? lo[i] : hi[i-N/2]; }
SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
Vec v;
memcpy(&v, ptr, sizeof(Vec));
return v;
}
SKVX_ALWAYS_INLINE void store(void* ptr) const {
memcpy(ptr, this, sizeof(Vec));
}
};
template <typename T>
struct Vec<4,T> : public VecCommon<4,T> {
SKVX_ALWAYS_INLINE Vec() = default;
SKVX_ALWAYS_INLINE Vec(T s) : lo(s), hi(s) {}
SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> l) : VecCommon<4,T>(l) {}
SKVX_ALWAYS_INLINE Vec(T x, T y, T z, T w) : lo(x,y), hi(z, w) {}
SKVX_ALWAYS_INLINE Vec(Vec<2,T> xy, T z, T w) : lo(xy), hi(z,w) {}
SKVX_ALWAYS_INLINE Vec(T x, T y, Vec<2,T> zw) : lo(x,y), hi(zw) {}
SKVX_ALWAYS_INLINE Vec(Vec<2,T> xy, Vec<2,T> zw) : lo(xy), hi(zw) {}
SKVX_ALWAYS_INLINE Vec<2,T>& xy() { return lo; }
SKVX_ALWAYS_INLINE Vec<2,T>& zw() { return hi; }
SKVX_ALWAYS_INLINE T& x() { return lo.lo.val; }
SKVX_ALWAYS_INLINE T& y() { return lo.hi.val; }
SKVX_ALWAYS_INLINE T& z() { return hi.lo.val; }
SKVX_ALWAYS_INLINE T& w() { return hi.hi.val; }
SKVX_ALWAYS_INLINE Vec<2,T> xy() const { return lo; }
SKVX_ALWAYS_INLINE Vec<2,T> zw() const { return hi; }
SKVX_ALWAYS_INLINE T x() const { return lo.lo.val; }
SKVX_ALWAYS_INLINE T y() const { return lo.hi.val; }
SKVX_ALWAYS_INLINE T z() const { return hi.lo.val; }
SKVX_ALWAYS_INLINE T w() const { return hi.hi.val; }
// Exchange-based swizzles. These should take 1 cycle on NEON and 3 (pipelined) cycles on SSE.
SKVX_ALWAYS_INLINE Vec<4,T> yxwz() const { return shuffle<1,0,3,2>(*this); }
SKVX_ALWAYS_INLINE Vec<4,T> zwxy() const { return shuffle<2,3,0,1>(*this); }
Vec<2,T> lo, hi;
};
template <typename T>
struct Vec<2,T> : public VecCommon<2,T> {
SKVX_ALWAYS_INLINE Vec() = default;
SKVX_ALWAYS_INLINE Vec(T s) : lo(s), hi(s) {}
SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> l) : VecCommon<2,T>(l) {}
SKVX_ALWAYS_INLINE Vec(T x, T y) : lo(x), hi(y) {}
SKVX_ALWAYS_INLINE T& x() { return lo.val; }
SKVX_ALWAYS_INLINE T& y() { return hi.val; }
SKVX_ALWAYS_INLINE T x() const { return lo.val; }
SKVX_ALWAYS_INLINE T y() const { return hi.val; }
// This exchange-based swizzle should take 1 cycle on NEON and 3 (pipelined) cycles on SSE.
SKVX_ALWAYS_INLINE Vec<2,T> yx() const { return shuffle<1,0>(*this); }
SKVX_ALWAYS_INLINE Vec<4,T> xyxy() const { return Vec<4,T>(*this, *this); }
Vec<1,T> lo, hi;
};
template <typename T>
struct Vec<1,T> : public VecCommon<1,T> {
SKVX_ALWAYS_INLINE Vec() = default;
SKVX_ALWAYS_INLINE Vec(T s) : val(s) {}
SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> l) : VecCommon<1,T>(l) {}
struct Vec<1,T> {
T val;
SKVX_ALWAYS_INLINE Vec() = default;
template <typename U, typename=std::enable_if_t<std::is_convertible<U,T>::value>>
SKVX_ALWAYS_INLINE
Vec(U x) : val(x) {}
SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) : val(xs.size() ? *xs.begin() : 0) {}
SKVX_ALWAYS_INLINE T operator[](int) const { return val; }
SKVX_ALWAYS_INLINE T& operator[](int) { return val; }
SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
Vec v;
memcpy(&v, ptr, sizeof(Vec));
return v;
}
SKVX_ALWAYS_INLINE void store(void* ptr) const {
memcpy(ptr, this, sizeof(Vec));
}
};
// Ideally we'd only use bit_pun(), but until this file is always built as C++17 with constexpr if,

View File

@ -28,15 +28,15 @@ public:
GrPathXform& operator=(const SkMatrix& m) {
SkASSERT(!m.hasPerspective());
// Duplicate the matrix in float4.xy and float4.zw so we can map two points at once.
fScale = float2(m.getScaleX(), m.getScaleY()).xyxy();
fSkew = float2(m.getSkewX(), m.getSkewY()).xyxy();
fTrans = float2(m.getTranslateX(), m.getTranslateY()).xyxy();
// Duplicate the matrix in float4.lo and float4.hi so we can map two points at once.
fScale = {m.getScaleX(), m.getScaleY(), m.getScaleX(), m.getScaleY()};
fSkew = {m.getSkewX(), m.getSkewY(), m.getSkewX(), m.getSkewY()};
fTrans = {m.getTranslateX(), m.getTranslateY(), m.getTranslateX(), m.getTranslateY()};
return *this;
}
SK_ALWAYS_INLINE float2 mapPoint(float2 p) const {
return fScale.xy() * p + (fSkew.xy() * skvx::shuffle<1,0>(p) + fTrans.xy());
return fScale.lo * p + (fSkew.lo * skvx::shuffle<1,0>(p) + fTrans.lo);
}
SK_ALWAYS_INLINE SkPoint mapPoint(SkPoint p) const {

View File

@ -5,9 +5,6 @@
* found in the LICENSE file.
*/
// Uncomment this line to test the scalar implementation.
// #define SKNX_NO_SIMD
#include "include/private/SkVx.h"
#include "tests/Test.h"
#include <numeric>
@ -173,93 +170,6 @@ DEF_TEST(SkVx, r) {
}
}
DEF_TEST(SkVx_xy, r) {
float2 f = float2(1,2);
REPORTER_ASSERT(r, all(f == float2{1,2}));
REPORTER_ASSERT(r, f.x() == 1);
REPORTER_ASSERT(r, f.y() == 2);
f.y() = 9;
REPORTER_ASSERT(r, all(f == float2{1,9}));
f.x() = 0;
REPORTER_ASSERT(r, all(f == float2(0,9)));
f[0] = 8;
REPORTER_ASSERT(r, f.x() == 8);
f[1] = 6;
REPORTER_ASSERT(r, f.y() == 6);
REPORTER_ASSERT(r, all(f == float2(8,6)));
f = f.yx();
REPORTER_ASSERT(r, all(f == float2(6,8)));
REPORTER_ASSERT(r, skvx::bit_pun<SkPoint>(f) == SkPoint::Make(6,8));
SkPoint p;
f.store(&p);
REPORTER_ASSERT(r, p == SkPoint::Make(6,8));
f.yx().store(&p);
REPORTER_ASSERT(r, p == SkPoint::Make(8,6));
REPORTER_ASSERT(r, all(f.xyxy() == float4(6,8,6,8)));
REPORTER_ASSERT(r, all(f.xyxy() == float4(f,f)));
REPORTER_ASSERT(r, all(skvx::join(f,f) == f.xyxy()));
REPORTER_ASSERT(r, all(skvx::join(f.yx(),f) == float4(f.y(),f.x(),f)));
REPORTER_ASSERT(r, all(skvx::join(f.yx(),f) == float4(f.yx(),f.x(),f.y())));
REPORTER_ASSERT(r, all(skvx::join(f,f.yx()) == float4(f.x(),f.y(),f.yx())));
REPORTER_ASSERT(r, all(skvx::join(f.yx(),f.yx()) == float4(f.yx(),f.yx())));
}
DEF_TEST(SkVx_xyzw, r) {
float4 f = float4{1,2,3,4};
REPORTER_ASSERT(r, all(f == float4(1,2,3,4)));
REPORTER_ASSERT(r, all(f == float4(1,2,float2(3,4))));
REPORTER_ASSERT(r, all(f == float4(float2(1,2),3,4)));
REPORTER_ASSERT(r, all(f == float4(float2(1,2),float2(3,4))));
f.xy() = float2(9,8);
REPORTER_ASSERT(r, all(f == float4(9,8,3,4)));
f.zw().x() = 7;
f.zw().y() = 6;
REPORTER_ASSERT(r, all(f == float4(9,8,7,6)));
f.x() = 5;
f.y() = 4;
f.z() = 3;
f.w() = 2;
REPORTER_ASSERT(r, all(f == float4(5,4,3,2)));
f[0] = 0;
REPORTER_ASSERT(r, f.x() == 0);
f[1] = 1;
REPORTER_ASSERT(r, f.y() == 1);
f[2] = 2;
REPORTER_ASSERT(r, f.z() == 2);
f[3] = 3;
REPORTER_ASSERT(r, f.w() == 3);
REPORTER_ASSERT(r, skvx::all(f.xy() == float2(0,1)));
REPORTER_ASSERT(r, skvx::all(f.zw() == float2{2,3}));
REPORTER_ASSERT(r, all(f == float4(0,1,2,3)));
REPORTER_ASSERT(r, all(f.yxwz().lo == skvx::shuffle<1,0>(f)));
REPORTER_ASSERT(r, all(f.yxwz().hi == skvx::shuffle<3,2>(f)));
REPORTER_ASSERT(r, all(f.zwxy().lo.lo == f.z()));
REPORTER_ASSERT(r, all(f.zwxy().lo.hi == f.w()));
REPORTER_ASSERT(r, all(f.zwxy().hi.lo == f.x()));
REPORTER_ASSERT(r, all(f.zwxy().hi.hi == f.y()));
REPORTER_ASSERT(r, f.yxwz().lo.lo.val == f.y());
REPORTER_ASSERT(r, f.yxwz().lo.hi.val == f.x());
REPORTER_ASSERT(r, f.yxwz().hi.lo.val == f.w());
REPORTER_ASSERT(r, f.yxwz().hi.hi.val == f.z());
REPORTER_ASSERT(r, all(skvx::naive_if_then_else(int2(0,~0),
skvx::shuffle<3,2>(float4(0,1,2,3)),
float4(4,5,6,7).xy()) == float2(4,2)));
REPORTER_ASSERT(r, all(skvx::if_then_else(int2(0,~0),
skvx::shuffle<3,2>(float4(0,1,2,3)),
float4(4,5,6,7).xy()) == float2(4,2)));
REPORTER_ASSERT(r, all(skvx::naive_if_then_else(int2(0,~0).xyxy(),
float4(0,1,2,3).zwxy(),
float4(4,5,6,7)) == float4(4,3,6,1)));
REPORTER_ASSERT(r, all(skvx::if_then_else(int2(0,~0).xyxy(),
float4(0,1,2,3).zwxy(),
float4(4,5,6,7)) == float4(4,3,6,1)));
REPORTER_ASSERT(r, all(skvx::pin(float4(0,1,2,3).yxwz(),
float2(1).xyxy(),
float2(2).xyxy()) == float4(1,1,2,2)));
}
static bool check_approx_acos(skiatest::Reporter* r, float x, float approx_acos_x) {
float acosf_x = acosf(x);
float error = acosf_x - approx_acos_x;