prune unused SkNx features

- remove float -> int conversion, keeping float -> byte
  - remove support for doubles

I was thinking of specializing Sk8f for AVX.  This will help keep the complexity down.

This may cause minor diffs in radial gradients: toBytes() rounds where castTrunc() truncated.  But I don't see any diffs in Gold.
https://gold.skia.org/search2?issue=1411563008&unt=true&query=source_type%3Dgm&master=false

BUG=skia:4117
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Review URL: https://codereview.chromium.org/1411563008
This commit is contained in:
mtklein 2015-11-09 08:33:53 -08:00 committed by Commit bot
parent a4c26c0d3a
commit 6f797092d2
5 changed files with 66 additions and 217 deletions

View File

@ -81,29 +81,28 @@ protected:
SkNi<N/2, T> fLo, fHi;
};
template <int N, typename T>
template <int N>
class SkNf {
static int32_t MyNi(float);
static int64_t MyNi(double);
typedef decltype(MyNi(T())) I;
public:
SkNf() {}
explicit SkNf(T val) : fLo(val), fHi(val) {}
static SkNf Load(const T vals[N]) {
return SkNf(SkNf<N/2,T>::Load(vals), SkNf<N/2,T>::Load(vals+N/2));
explicit SkNf(float val) : fLo(val), fHi(val) {}
static SkNf Load(const float vals[N]) {
return SkNf(SkNf<N/2>::Load(vals), SkNf<N/2>::Load(vals+N/2));
}
// FromBytes() and toBytes() specializations may assume their argument is N-byte aligned.
// E.g. Sk4f::FromBytes() may assume it's reading from a 4-byte-aligned pointer.
// Converts [0,255] bytes to [0.0, 255.0] floats.
static SkNf FromBytes(const uint8_t bytes[N]) {
return SkNf(SkNf<N/2,T>::FromBytes(bytes), SkNf<N/2,T>::FromBytes(bytes+N/2));
return SkNf(SkNf<N/2>::FromBytes(bytes), SkNf<N/2>::FromBytes(bytes+N/2));
}
SkNf(T a, T b) : fLo(a), fHi(b) { REQUIRE(N==2); }
SkNf(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4); }
SkNf(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) { REQUIRE(N==8); }
SkNf(float a, float b) : fLo(a), fHi(b) { REQUIRE(N==2); }
SkNf(float a, float b, float c, float d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4); }
SkNf(float a, float b, float c, float d, float e, float f, float g, float h)
: fLo(a,b,c,d)
, fHi(e,f,g,h) { REQUIRE(N==8); }
void store(T vals[N]) const {
void store(float vals[N]) const {
fLo.store(vals);
fHi.store(vals+N/2);
}
@ -114,8 +113,6 @@ public:
fHi.toBytes(bytes+N/2);
}
SkNi<N,I> castTrunc() const { return SkNi<N,I>(fLo.castTrunc(), fHi.castTrunc()); }
SkNf operator + (const SkNf& o) const { return SkNf(fLo + o.fLo, fHi + o.fHi); }
SkNf operator - (const SkNf& o) const { return SkNf(fLo - o.fLo, fHi - o.fHi); }
SkNf operator * (const SkNf& o) const { return SkNf(fLo * o.fLo, fHi * o.fHi); }
@ -129,10 +126,10 @@ public:
SkNf operator >= (const SkNf& o) const { return SkNf(fLo >= o.fLo, fHi >= o.fHi); }
static SkNf Min(const SkNf& l, const SkNf& r) {
return SkNf(SkNf<N/2,T>::Min(l.fLo, r.fLo), SkNf<N/2,T>::Min(l.fHi, r.fHi));
return SkNf(SkNf<N/2>::Min(l.fLo, r.fLo), SkNf<N/2>::Min(l.fHi, r.fHi));
}
static SkNf Max(const SkNf& l, const SkNf& r) {
return SkNf(SkNf<N/2,T>::Max(l.fLo, r.fLo), SkNf<N/2,T>::Max(l.fHi, r.fHi));
return SkNf(SkNf<N/2>::Max(l.fLo, r.fLo), SkNf<N/2>::Max(l.fHi, r.fHi));
}
SkNf sqrt() const { return SkNf(fLo. sqrt(), fHi. sqrt()); }
@ -145,7 +142,7 @@ public:
SkNf invert() const { return SkNf(fLo. invert(), fHi. invert()); }
SkNf approxInvert() const { return SkNf(fLo.approxInvert(), fHi.approxInvert()); }
template <int k> T kth() const {
template <int k> float kth() const {
SkASSERT(0 <= k && k < N);
return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>();
}
@ -158,9 +155,9 @@ public:
protected:
REQUIRE(0 == (N & (N-1)));
SkNf(const SkNf<N/2, T>& lo, const SkNf<N/2, T>& hi) : fLo(lo), fHi(hi) {}
SkNf(const SkNf<N/2>& lo, const SkNf<N/2>& hi) : fLo(lo), fHi(hi) {}
SkNf<N/2, T> fLo, fHi;
SkNf<N/2> fLo, fHi;
};
@ -204,21 +201,16 @@ protected:
T fVal;
};
template <typename T>
class SkNf<1,T> {
static int32_t MyNi(float);
static int64_t MyNi(double);
typedef decltype(MyNi(T())) I;
template <>
class SkNf<1> {
public:
SkNf() {}
explicit SkNf(T val) : fVal(val) {}
static SkNf Load(const T vals[1]) { return SkNf(vals[0]); }
static SkNf FromBytes(const uint8_t bytes[1]) { return SkNf((T)bytes[0]); }
explicit SkNf(float val) : fVal(val) {}
static SkNf Load(const float vals[1]) { return SkNf(vals[0]); }
static SkNf FromBytes(const uint8_t bytes[1]) { return SkNf((float)bytes[0]); }
void store(T vals[1]) const { vals[0] = fVal; }
void toBytes(uint8_t bytes[1]) const { bytes[0] = (uint8_t)(SkTMin(fVal, (T)255.0)); }
SkNi<1,I> castTrunc() const { return SkNi<1,I>(fVal); }
void store(float vals[1]) const { vals[0] = fVal; }
void toBytes(uint8_t bytes[1]) const { bytes[0] = (uint8_t)(SkTMin(fVal, 255.0f)); }
SkNf operator + (const SkNf& o) const { return SkNf(fVal + o.fVal); }
SkNf operator - (const SkNf& o) const { return SkNf(fVal - o.fVal); }
@ -235,35 +227,30 @@ public:
static SkNf Min(const SkNf& l, const SkNf& r) { return SkNf(SkTMin(l.fVal, r.fVal)); }
static SkNf Max(const SkNf& l, const SkNf& r) { return SkNf(SkTMax(l.fVal, r.fVal)); }
SkNf sqrt() const { return SkNf(Sqrt(fVal)); }
SkNf rsqrt0() const { return SkNf((T)1 / Sqrt(fVal)); }
SkNf sqrt() const { return SkNf(sqrtf(fVal)); }
SkNf rsqrt0() const { return SkNf(1.0f / sqrtf(fVal)); }
SkNf rsqrt1() const { return this->rsqrt0(); }
SkNf rsqrt2() const { return this->rsqrt1(); }
SkNf invert() const { return SkNf((T)1 / fVal); }
SkNf invert() const { return SkNf(1.0f / fVal); }
SkNf approxInvert() const { return this->invert(); }
template <int k> T kth() const {
template <int k> float kth() const {
SkASSERT(k == 0);
return fVal;
}
bool allTrue() const { return this->pun(); }
bool anyTrue() const { return this->pun(); }
bool allTrue() const { return this->pun() != 0; }
bool anyTrue() const { return this->pun() != 0; }
SkNf thenElse(const SkNf& t, const SkNf& e) const { return this->pun() ? t : e; }
protected:
// We do double sqrts natively, or via floats for any other type.
template <typename U>
static U Sqrt(U val) { return (U) ::sqrtf((float)val); }
static double Sqrt(double val) { return ::sqrt ( val); }
I pun() const {
union { T f; I i; } pun = { fVal };
uint32_t pun() const {
union { float f; uint32_t i; } pun = { fVal };
return pun.i;
}
T fVal;
float fVal;
};
// This default implementation can be specialized by ../opts/SkNx_foo.h
@ -285,8 +272,6 @@ inline SkNx SkNx_dup(const SkNx& src) { return SkNx_shuffle<Ix>(src); }
} // namespace
// Include platform specific specializations if available.
#ifndef SKNX_NO_SIMD
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
@ -298,21 +283,14 @@ inline SkNx SkNx_dup(const SkNx& src) { return SkNx_shuffle<Ix>(src); }
#undef REQUIRE
typedef SkNf<2, float> Sk2f;
typedef SkNf<2, double> Sk2d;
typedef SkNf<2, SkScalar> Sk2s;
typedef SkNf<2> Sk2f;
typedef SkNf<2> Sk2s;
typedef SkNf<4, float> Sk4f;
typedef SkNf<4, double> Sk4d;
typedef SkNf<4, SkScalar> Sk4s;
typedef SkNf<4> Sk4f;
typedef SkNf<4> Sk4s;
typedef SkNi<4, uint16_t> Sk4h;
typedef SkNi<8, uint16_t> Sk8h;
typedef SkNi<16, uint16_t> Sk16h;
typedef SkNi<16, uint8_t> Sk16b;
typedef SkNi<4, int32_t> Sk4i;
typedef SkNi<4, uint32_t> Sk4u;
typedef SkNi<16, uint8_t> Sk16b;
#endif//SkNx_DEFINED

View File

@ -306,8 +306,8 @@ void shadeSpan_radial_clamp2(SkScalar sfx, SkScalar sdx, SkScalar sfy, SkScalar
R = R + dR;
dR = dR + ddR;
int fi[4];
dist.castTrunc().store(fi);
uint8_t fi[4];
dist.toBytes(fi);
for (int i = 0; i < 4; i++) {
*dstC++ = cache[toggle + fi[i]];
@ -318,8 +318,8 @@ void shadeSpan_radial_clamp2(SkScalar sfx, SkScalar sdx, SkScalar sfy, SkScalar
if (count) {
Sk4f dist = Sk4f::Min(fast_sqrt(R), max);
int fi[4];
dist.castTrunc().store(fi);
uint8_t fi[4];
dist.toBytes(fi);
for (int i = 0; i < count; i++) {
*dstC++ = cache[toggle + fi[i]];
toggle = next_dither_toggle(toggle);

View File

@ -33,7 +33,7 @@ namespace { // See SkNx.h
case 31: return op(v, 31); } return fVec
template <>
class SkNf<2, float> {
class SkNf<2> {
public:
SkNf(float32x2_t vec) : fVec(vec) {}
@ -113,81 +113,6 @@ public:
float32x2_t fVec;
};
#if defined(SK_CPU_ARM64)
template <>
class SkNf<2, double> {
public:
SkNf(float64x2_t vec) : fVec(vec) {}
SkNf() {}
explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {}
static SkNf Load(const double vals[2]) { return vld1q_f64(vals); }
SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; }
void store(double vals[2]) const { vst1q_f64(vals, fVec); }
SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); }
SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); }
SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); }
SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); }
// vreinterpretq_f64_u64 and vreinterpretq_f64_u32 don't seem to exist.... weird.
SkNf operator==(const SkNf& o) const { return (float64x2_t)(vceqq_f64(fVec, o.fVec)); }
SkNf operator <(const SkNf& o) const { return (float64x2_t)(vcltq_f64(fVec, o.fVec)); }
SkNf operator >(const SkNf& o) const { return (float64x2_t)(vcgtq_f64(fVec, o.fVec)); }
SkNf operator<=(const SkNf& o) const { return (float64x2_t)(vcleq_f64(fVec, o.fVec)); }
SkNf operator>=(const SkNf& o) const { return (float64x2_t)(vcgeq_f64(fVec, o.fVec)); }
SkNf operator != (const SkNf& o) const {
return (float64x2_t)(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.fVec))));
}
static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.fVec); }
static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.fVec); }
SkNf sqrt() const { return vsqrtq_f64(fVec); }
SkNf rsqrt0() const { return vrsqrteq_f64(fVec); }
SkNf rsqrt1() const {
float64x2_t est0 = this->rsqrt0().fVec;
return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
}
SkNf rsqrt2() const {
float64x2_t est1 = this->rsqrt1().fVec;
return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1);
}
SkNf approxInvert() const {
float64x2_t est0 = vrecpeq_f64(fVec),
est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0);
return est1;
}
SkNf invert() const {
float64x2_t est1 = this->approxInvert().fVec,
est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1),
est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2);
return est3;
}
template <int k> double kth() const {
SkASSERT(0 <= k && k < 2);
return vgetq_lane_f64(fVec, k&1);
}
// vreinterpretq_u64_f64 doesn't seem to exist.... weird.
bool allTrue() const {
auto v = (uint64x2_t)(fVec);
return vgetq_lane_u64(v,0) && vgetq_lane_u64(v,1);
}
bool anyTrue() const {
auto v = (uint64x2_t)(fVec);
return vgetq_lane_u64(v,0) || vgetq_lane_u64(v,1);
}
float64x2_t fVec;
};
#endif//defined(SK_CPU_ARM64)
template <>
class SkNi<4, int> {
public:
@ -216,7 +141,7 @@ public:
};
template <>
class SkNf<4, float> {
class SkNf<4> {
public:
SkNf(float32x4_t vec) : fVec(vec) {}
@ -240,8 +165,6 @@ public:
vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0);
}
SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); }
SkNf approxInvert() const {
float32x4_t est0 = vrecpeq_f32(fVec),
est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);

View File

@ -14,7 +14,7 @@ namespace { // See SkNx.h
template <>
class SkNf<2, float> {
class SkNf<2> {
public:
SkNf(const __m128& vec) : fVec(vec) {}
@ -62,53 +62,6 @@ public:
__m128 fVec;
};
template <>
class SkNf<2, double> {
public:
SkNf(const __m128d& vec) : fVec(vec) {}
SkNf() {}
explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {}
static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); }
SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {}
void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); }
SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); }
SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); }
SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); }
SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); }
SkNf operator == (const SkNf& o) const { return _mm_cmpeq_pd (fVec, o.fVec); }
SkNf operator != (const SkNf& o) const { return _mm_cmpneq_pd(fVec, o.fVec); }
SkNf operator < (const SkNf& o) const { return _mm_cmplt_pd (fVec, o.fVec); }
SkNf operator > (const SkNf& o) const { return _mm_cmpgt_pd (fVec, o.fVec); }
SkNf operator <= (const SkNf& o) const { return _mm_cmple_pd (fVec, o.fVec); }
SkNf operator >= (const SkNf& o) const { return _mm_cmpge_pd (fVec, o.fVec); }
static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.fVec); }
static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); }
SkNf sqrt() const { return _mm_sqrt_pd(fVec); }
SkNf rsqrt0() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); }
SkNf rsqrt1() const { return this->rsqrt0(); }
SkNf rsqrt2() const { return this->rsqrt1(); }
SkNf invert() const { return SkNf(1) / *this; }
SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); }
template <int k> double kth() const {
SkASSERT(0 <= k && k < 2);
union { __m128d v; double ds[2]; } pun = {fVec};
return pun.ds[k&1];
}
bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castpd_si128(fVec)); }
bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castpd_si128(fVec)); }
__m128d fVec;
};
template <>
class SkNi<4, int> {
public:
@ -148,7 +101,7 @@ public:
};
template <>
class SkNf<4, float> {
class SkNf<4> {
public:
SkNf(const __m128& vec) : fVec(vec) {}
@ -179,8 +132,6 @@ public:
*(int*)bytes = _mm_cvtsi128_si32(fix8);
}
SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); }
SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); }
SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); }
SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); }

View File

@ -10,12 +10,12 @@
#include "SkRandom.h"
#include "Test.h"
template <int N, typename T>
template <int N>
static void test_Nf(skiatest::Reporter* r) {
auto assert_nearly_eq = [&](double eps, const SkNf<N,T>& v, T a, T b, T c, T d) {
auto close = [=](T a, T b) { return fabs(a-b) <= eps; };
T vals[4];
auto assert_nearly_eq = [&](float eps, const SkNf<N>& v, float a, float b, float c, float d) {
auto close = [=](float a, float b) { return fabsf(a-b) <= eps; };
float vals[4];
v.store(vals);
bool ok = close(vals[0], a) && close(vals[1], b)
&& close(v.template kth<0>(), a) && close(v.template kth<1>(), b);
@ -26,15 +26,15 @@ static void test_Nf(skiatest::Reporter* r) {
REPORTER_ASSERT(r, ok);
}
};
auto assert_eq = [&](const SkNf<N,T>& v, T a, T b, T c, T d) {
auto assert_eq = [&](const SkNf<N>& v, float a, float b, float c, float d) {
return assert_nearly_eq(0, v, a,b,c,d);
};
T vals[] = {3, 4, 5, 6};
SkNf<N,T> a = SkNf<N,T>::Load(vals),
b(a),
c = a;
SkNf<N,T> d;
float vals[] = {3, 4, 5, 6};
SkNf<N> a = SkNf<N>::Load(vals),
b(a),
c = a;
SkNf<N> d;
d = a;
assert_eq(a, 3, 4, 5, 6);
@ -47,20 +47,20 @@ static void test_Nf(skiatest::Reporter* r) {
assert_eq(a*b-b, 6, 12, 20, 30);
assert_eq((a*b).sqrt(), 3, 4, 5, 6);
assert_eq(a/b, 1, 1, 1, 1);
assert_eq(SkNf<N,T>(0)-a, -3, -4, -5, -6);
assert_eq(SkNf<N>(0)-a, -3, -4, -5, -6);
SkNf<N,T> fours(4);
SkNf<N> fours(4);
assert_eq(fours.sqrt(), 2,2,2,2);
assert_nearly_eq(0.001, fours.rsqrt0(), 0.5, 0.5, 0.5, 0.5);
assert_nearly_eq(0.001, fours.rsqrt1(), 0.5, 0.5, 0.5, 0.5);
assert_nearly_eq(0.001, fours.rsqrt2(), 0.5, 0.5, 0.5, 0.5);
assert_nearly_eq(0.001f, fours.rsqrt0(), 0.5, 0.5, 0.5, 0.5);
assert_nearly_eq(0.001f, fours.rsqrt1(), 0.5, 0.5, 0.5, 0.5);
assert_nearly_eq(0.001f, fours.rsqrt2(), 0.5, 0.5, 0.5, 0.5);
assert_eq( fours. invert(), 0.25, 0.25, 0.25, 0.25);
assert_nearly_eq(0.001, fours.approxInvert(), 0.25, 0.25, 0.25, 0.25);
assert_eq( fours. invert(), 0.25, 0.25, 0.25, 0.25);
assert_nearly_eq(0.001f, fours.approxInvert(), 0.25, 0.25, 0.25, 0.25);
assert_eq(SkNf<N,T>::Min(a, fours), 3, 4, 4, 4);
assert_eq(SkNf<N,T>::Max(a, fours), 4, 4, 5, 6);
assert_eq(SkNf<N>::Min(a, fours), 3, 4, 4, 4);
assert_eq(SkNf<N>::Max(a, fours), 4, 4, 5, 6);
// Test some comparisons. This is not exhaustive.
REPORTER_ASSERT(r, (a == b).allTrue());
@ -75,11 +75,8 @@ static void test_Nf(skiatest::Reporter* r) {
}
DEF_TEST(SkNf, r) {
test_Nf<2, float>(r);
test_Nf<2, double>(r);
test_Nf<4, float>(r);
test_Nf<4, double>(r);
test_Nf<2>(r);
test_Nf<4>(r);
}
template <int N, typename T>