From 7732f4f8f2536688164f45ac329f5268759b4b26 Mon Sep 17 00:00:00 2001 From: Chris Dalton Date: Mon, 28 Aug 2017 14:45:40 -0600 Subject: [PATCH] Add missing methods to neon/sse SkNx implementations Adds negate, abs, sqrt to Sk2f and/or Sk4f. Bug: skia: Change-Id: I0688dae45b32ff94abcc0525ef1f09d666f9c6e9 Reviewed-on: https://skia-review.googlesource.com/39642 Reviewed-by: Mike Klein Commit-Queue: Chris Dalton --- src/opts/SkNx_neon.h | 10 ++++++++++ src/opts/SkNx_sse.h | 14 ++++++++++++++ tests/SkNxTest.cpp | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index 5ec3dc2f2b..4d7aefc93f 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -40,6 +40,8 @@ public: return est1; } + AI SkNx operator - () const { return vneg_f32(fVec); } + AI SkNx operator + (const SkNx& o) const { return vadd_f32(fVec, o.fVec); } AI SkNx operator - (const SkNx& o) const { return vsub_f32(fVec, o.fVec); } AI SkNx operator * (const SkNx& o) const { return vmul_f32(fVec, o.fVec); } @@ -66,6 +68,8 @@ public: AI static SkNx Min(const SkNx& l, const SkNx& r) { return vmin_f32(l.fVec, r.fVec); } AI static SkNx Max(const SkNx& l, const SkNx& r) { return vmax_f32(l.fVec, r.fVec); } + AI SkNx abs() const { return vabs_f32(fVec); } + AI SkNx rsqrt() const { float32x2_t est0 = vrsqrte_f32(fVec); return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); @@ -97,6 +101,10 @@ public: return vget_lane_u32(v,0) || vget_lane_u32(v,1); } + AI SkNx thenElse(const SkNx& t, const SkNx& e) const { + return vbsl_f32(vreinterpret_u32_f32(fVec), t.fVec, e.fVec); + } + float32x2_t fVec; }; @@ -135,6 +143,8 @@ public: return est1; } + AI SkNx operator - () const { return vnegq_f32(fVec); } + AI SkNx operator + (const SkNx& o) const { return vaddq_f32(fVec, o.fVec); } AI SkNx operator - (const SkNx& o) const { return vsubq_f32(fVec, o.fVec); } AI SkNx operator * (const SkNx& o) const { return vmulq_f32(fVec, o.fVec); } diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index 54c95970aa..415d6e1205 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -29,6 +29,8 @@ public: AI void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); } + AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); } + AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } @@ -44,6 +46,7 @@ public: AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); } AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); } + AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } AI SkNx sqrt() const { return _mm_sqrt_ps (fVec); } AI SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); } AI SkNx invert() const { return _mm_rcp_ps(fVec); } @@ -57,6 +60,15 @@ public: AI bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); } AI bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); } + AI SkNx thenElse(const SkNx& t, const SkNx& e) const { + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 + return _mm_blendv_ps(e.fVec, t.fVec, fVec); + #else + return _mm_or_ps(_mm_and_ps (fVec, t.fVec), + _mm_andnot_ps(fVec, e.fVec)); + #endif + } + __m128 fVec; }; @@ -95,6 +107,8 @@ public: _mm_storeu_ps(((float*) dst) + 12, v3); } + AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); } + AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp index e3f3cd6f3f..240d7e001b 100644 --- a/tests/SkNxTest.cpp +++ b/tests/SkNxTest.cpp @@ -210,6 +210,12 @@ DEF_TEST(SkNx_abs, r) { REPORTER_ASSERT(r, fs[1] == 0.0f); REPORTER_ASSERT(r, fs[2] == 2.0f); REPORTER_ASSERT(r, fs[3] == 4.0f); + auto fshi = Sk2f(0.0f, -0.0f).abs(); + auto fslo = Sk2f(2.0f, -4.0f).abs(); + REPORTER_ASSERT(r, fshi[0] == 0.0f); + REPORTER_ASSERT(r, fshi[1] == 0.0f); + REPORTER_ASSERT(r, fslo[0] == 2.0f); + REPORTER_ASSERT(r, fslo[1] == 4.0f); } DEF_TEST(Sk4i_abs, r) { @@ -358,3 +364,31 @@ DEF_TEST(SkNx_4fLoad4Store4, r) { Sk4f::Store4(dst, a, b, c, d); REPORTER_ASSERT(r, 0 == memcmp(dst, src, 16 * sizeof(float))); } + +DEF_TEST(SkNx_neg, r) { + auto fs = -Sk4f(0.0f, -0.0f, 2.0f, -4.0f); + REPORTER_ASSERT(r, fs[0] == 0.0f); + REPORTER_ASSERT(r, fs[1] == 0.0f); + REPORTER_ASSERT(r, fs[2] == -2.0f); + REPORTER_ASSERT(r, fs[3] == 4.0f); + auto fshi = -Sk2f(0.0f, -0.0f); + auto fslo = -Sk2f(2.0f, -4.0f); + REPORTER_ASSERT(r, fshi[0] == 0.0f); + REPORTER_ASSERT(r, fshi[1] == 0.0f); + REPORTER_ASSERT(r, fslo[0] == -2.0f); + REPORTER_ASSERT(r, fslo[1] == 4.0f); +} + +DEF_TEST(SkNx_thenElse, r) { + auto fs = (Sk4f(0.0f, -0.0f, 2.0f, -4.0f) < 0).thenElse(-1, 1); + REPORTER_ASSERT(r, fs[0] == 1); + REPORTER_ASSERT(r, fs[1] == 1); + REPORTER_ASSERT(r, fs[2] == 1); + REPORTER_ASSERT(r, fs[3] == -1); + auto fshi = (Sk2f(0.0f, -0.0f) < 0).thenElse(-1, 1); + auto fslo = (Sk2f(2.0f, -4.0f) < 0).thenElse(-1, 1); + REPORTER_ASSERT(r, fshi[0] == 1); + REPORTER_ASSERT(r, fshi[1] == 1); + REPORTER_ASSERT(r, fslo[0] == 1); + REPORTER_ASSERT(r, fslo[1] == -1); +}