Add missing methods to neon/sse SkNx implementations

Adds negate, abs, sqrt to Sk2f and/or Sk4f.

Bug: skia:
Change-Id: I0688dae45b32ff94abcc0525ef1f09d666f9c6e9
Reviewed-on: https://skia-review.googlesource.com/39642
Reviewed-by: Mike Klein <mtklein@chromium.org>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
This commit is contained in:
Chris Dalton 2017-08-28 14:45:40 -06:00 committed by Skia Commit-Bot
parent 08133583d5
commit 7732f4f8f2
3 changed files with 58 additions and 0 deletions

View File

@ -40,6 +40,8 @@ public:
return est1;
}
AI SkNx operator - () const { return vneg_f32(fVec); }
AI SkNx operator + (const SkNx& o) const { return vadd_f32(fVec, o.fVec); }
AI SkNx operator - (const SkNx& o) const { return vsub_f32(fVec, o.fVec); }
AI SkNx operator * (const SkNx& o) const { return vmul_f32(fVec, o.fVec); }
@ -66,6 +68,8 @@ public:
AI static SkNx Min(const SkNx& l, const SkNx& r) { return vmin_f32(l.fVec, r.fVec); }
AI static SkNx Max(const SkNx& l, const SkNx& r) { return vmax_f32(l.fVec, r.fVec); }
AI SkNx abs() const { return vabs_f32(fVec); }
AI SkNx rsqrt() const {
float32x2_t est0 = vrsqrte_f32(fVec);
return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
@ -97,6 +101,10 @@ public:
return vget_lane_u32(v,0) || vget_lane_u32(v,1);
}
AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
return vbsl_f32(vreinterpret_u32_f32(fVec), t.fVec, e.fVec);
}
float32x2_t fVec;
};
@ -135,6 +143,8 @@ public:
return est1;
}
AI SkNx operator - () const { return vnegq_f32(fVec); }
AI SkNx operator + (const SkNx& o) const { return vaddq_f32(fVec, o.fVec); }
AI SkNx operator - (const SkNx& o) const { return vsubq_f32(fVec, o.fVec); }
AI SkNx operator * (const SkNx& o) const { return vmulq_f32(fVec, o.fVec); }

View File

@ -29,6 +29,8 @@ public:
AI void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); }
AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
@ -44,6 +46,7 @@ public:
AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); }
AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); }
AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
AI SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
AI SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); }
AI SkNx invert() const { return _mm_rcp_ps(fVec); }
@ -57,6 +60,15 @@ public:
AI bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); }
AI bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); }
AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
return _mm_blendv_ps(e.fVec, t.fVec, fVec);
#else
return _mm_or_ps(_mm_and_ps (fVec, t.fVec),
_mm_andnot_ps(fVec, e.fVec));
#endif
}
__m128 fVec;
};
@ -95,6 +107,8 @@ public:
_mm_storeu_ps(((float*) dst) + 12, v3);
}
AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }

View File

@ -210,6 +210,12 @@ DEF_TEST(SkNx_abs, r) {
REPORTER_ASSERT(r, fs[1] == 0.0f);
REPORTER_ASSERT(r, fs[2] == 2.0f);
REPORTER_ASSERT(r, fs[3] == 4.0f);
auto fshi = Sk2f(0.0f, -0.0f).abs();
auto fslo = Sk2f(2.0f, -4.0f).abs();
REPORTER_ASSERT(r, fshi[0] == 0.0f);
REPORTER_ASSERT(r, fshi[1] == 0.0f);
REPORTER_ASSERT(r, fslo[0] == 2.0f);
REPORTER_ASSERT(r, fslo[1] == 4.0f);
}
DEF_TEST(Sk4i_abs, r) {
@ -358,3 +364,31 @@ DEF_TEST(SkNx_4fLoad4Store4, r) {
Sk4f::Store4(dst, a, b, c, d);
REPORTER_ASSERT(r, 0 == memcmp(dst, src, 16 * sizeof(float)));
}
DEF_TEST(SkNx_neg, r) {
auto fs = -Sk4f(0.0f, -0.0f, 2.0f, -4.0f);
REPORTER_ASSERT(r, fs[0] == 0.0f);
REPORTER_ASSERT(r, fs[1] == 0.0f);
REPORTER_ASSERT(r, fs[2] == -2.0f);
REPORTER_ASSERT(r, fs[3] == 4.0f);
auto fshi = -Sk2f(0.0f, -0.0f);
auto fslo = -Sk2f(2.0f, -4.0f);
REPORTER_ASSERT(r, fshi[0] == 0.0f);
REPORTER_ASSERT(r, fshi[1] == 0.0f);
REPORTER_ASSERT(r, fslo[0] == -2.0f);
REPORTER_ASSERT(r, fslo[1] == 4.0f);
}
DEF_TEST(SkNx_thenElse, r) {
auto fs = (Sk4f(0.0f, -0.0f, 2.0f, -4.0f) < 0).thenElse(-1, 1);
REPORTER_ASSERT(r, fs[0] == 1);
REPORTER_ASSERT(r, fs[1] == 1);
REPORTER_ASSERT(r, fs[2] == 1);
REPORTER_ASSERT(r, fs[3] == -1);
auto fshi = (Sk2f(0.0f, -0.0f) < 0).thenElse(-1, 1);
auto fslo = (Sk2f(2.0f, -4.0f) < 0).thenElse(-1, 1);
REPORTER_ASSERT(r, fshi[0] == 1);
REPORTER_ASSERT(r, fshi[1] == 1);
REPORTER_ASSERT(r, fslo[0] == 1);
REPORTER_ASSERT(r, fslo[1] == -1);
}