Add missing methods to neon/sse SkNx implementations
Adds negate, abs, sqrt to Sk2f and/or Sk4f. Bug: skia: Change-Id: I0688dae45b32ff94abcc0525ef1f09d666f9c6e9 Reviewed-on: https://skia-review.googlesource.com/39642 Reviewed-by: Mike Klein <mtklein@chromium.org> Commit-Queue: Chris Dalton <csmartdalton@google.com>
This commit is contained in:
parent
08133583d5
commit
7732f4f8f2
@ -40,6 +40,8 @@ public:
|
||||
return est1;
|
||||
}
|
||||
|
||||
AI SkNx operator - () const { return vneg_f32(fVec); }
|
||||
|
||||
AI SkNx operator + (const SkNx& o) const { return vadd_f32(fVec, o.fVec); }
|
||||
AI SkNx operator - (const SkNx& o) const { return vsub_f32(fVec, o.fVec); }
|
||||
AI SkNx operator * (const SkNx& o) const { return vmul_f32(fVec, o.fVec); }
|
||||
@ -66,6 +68,8 @@ public:
|
||||
AI static SkNx Min(const SkNx& l, const SkNx& r) { return vmin_f32(l.fVec, r.fVec); }
|
||||
AI static SkNx Max(const SkNx& l, const SkNx& r) { return vmax_f32(l.fVec, r.fVec); }
|
||||
|
||||
AI SkNx abs() const { return vabs_f32(fVec); }
|
||||
|
||||
AI SkNx rsqrt() const {
|
||||
float32x2_t est0 = vrsqrte_f32(fVec);
|
||||
return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
|
||||
@ -97,6 +101,10 @@ public:
|
||||
return vget_lane_u32(v,0) || vget_lane_u32(v,1);
|
||||
}
|
||||
|
||||
AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
|
||||
return vbsl_f32(vreinterpret_u32_f32(fVec), t.fVec, e.fVec);
|
||||
}
|
||||
|
||||
float32x2_t fVec;
|
||||
};
|
||||
|
||||
@ -135,6 +143,8 @@ public:
|
||||
return est1;
|
||||
}
|
||||
|
||||
AI SkNx operator - () const { return vnegq_f32(fVec); }
|
||||
|
||||
AI SkNx operator + (const SkNx& o) const { return vaddq_f32(fVec, o.fVec); }
|
||||
AI SkNx operator - (const SkNx& o) const { return vsubq_f32(fVec, o.fVec); }
|
||||
AI SkNx operator * (const SkNx& o) const { return vmulq_f32(fVec, o.fVec); }
|
||||
|
@ -29,6 +29,8 @@ public:
|
||||
|
||||
AI void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); }
|
||||
|
||||
AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
|
||||
|
||||
AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
|
||||
AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
|
||||
AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
|
||||
@ -44,6 +46,7 @@ public:
|
||||
AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); }
|
||||
AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); }
|
||||
|
||||
AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
|
||||
AI SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
|
||||
AI SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); }
|
||||
AI SkNx invert() const { return _mm_rcp_ps(fVec); }
|
||||
@ -57,6 +60,15 @@ public:
|
||||
AI bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); }
|
||||
AI bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); }
|
||||
|
||||
AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
|
||||
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
|
||||
return _mm_blendv_ps(e.fVec, t.fVec, fVec);
|
||||
#else
|
||||
return _mm_or_ps(_mm_and_ps (fVec, t.fVec),
|
||||
_mm_andnot_ps(fVec, e.fVec));
|
||||
#endif
|
||||
}
|
||||
|
||||
__m128 fVec;
|
||||
};
|
||||
|
||||
@ -95,6 +107,8 @@ public:
|
||||
_mm_storeu_ps(((float*) dst) + 12, v3);
|
||||
}
|
||||
|
||||
AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
|
||||
|
||||
AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
|
||||
AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
|
||||
AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
|
||||
|
@ -210,6 +210,12 @@ DEF_TEST(SkNx_abs, r) {
|
||||
REPORTER_ASSERT(r, fs[1] == 0.0f);
|
||||
REPORTER_ASSERT(r, fs[2] == 2.0f);
|
||||
REPORTER_ASSERT(r, fs[3] == 4.0f);
|
||||
auto fshi = Sk2f(0.0f, -0.0f).abs();
|
||||
auto fslo = Sk2f(2.0f, -4.0f).abs();
|
||||
REPORTER_ASSERT(r, fshi[0] == 0.0f);
|
||||
REPORTER_ASSERT(r, fshi[1] == 0.0f);
|
||||
REPORTER_ASSERT(r, fslo[0] == 2.0f);
|
||||
REPORTER_ASSERT(r, fslo[1] == 4.0f);
|
||||
}
|
||||
|
||||
DEF_TEST(Sk4i_abs, r) {
|
||||
@ -358,3 +364,31 @@ DEF_TEST(SkNx_4fLoad4Store4, r) {
|
||||
Sk4f::Store4(dst, a, b, c, d);
|
||||
REPORTER_ASSERT(r, 0 == memcmp(dst, src, 16 * sizeof(float)));
|
||||
}
|
||||
|
||||
DEF_TEST(SkNx_neg, r) {
|
||||
auto fs = -Sk4f(0.0f, -0.0f, 2.0f, -4.0f);
|
||||
REPORTER_ASSERT(r, fs[0] == 0.0f);
|
||||
REPORTER_ASSERT(r, fs[1] == 0.0f);
|
||||
REPORTER_ASSERT(r, fs[2] == -2.0f);
|
||||
REPORTER_ASSERT(r, fs[3] == 4.0f);
|
||||
auto fshi = -Sk2f(0.0f, -0.0f);
|
||||
auto fslo = -Sk2f(2.0f, -4.0f);
|
||||
REPORTER_ASSERT(r, fshi[0] == 0.0f);
|
||||
REPORTER_ASSERT(r, fshi[1] == 0.0f);
|
||||
REPORTER_ASSERT(r, fslo[0] == -2.0f);
|
||||
REPORTER_ASSERT(r, fslo[1] == 4.0f);
|
||||
}
|
||||
|
||||
DEF_TEST(SkNx_thenElse, r) {
|
||||
auto fs = (Sk4f(0.0f, -0.0f, 2.0f, -4.0f) < 0).thenElse(-1, 1);
|
||||
REPORTER_ASSERT(r, fs[0] == 1);
|
||||
REPORTER_ASSERT(r, fs[1] == 1);
|
||||
REPORTER_ASSERT(r, fs[2] == 1);
|
||||
REPORTER_ASSERT(r, fs[3] == -1);
|
||||
auto fshi = (Sk2f(0.0f, -0.0f) < 0).thenElse(-1, 1);
|
||||
auto fslo = (Sk2f(2.0f, -4.0f) < 0).thenElse(-1, 1);
|
||||
REPORTER_ASSERT(r, fshi[0] == 1);
|
||||
REPORTER_ASSERT(r, fshi[1] == 1);
|
||||
REPORTER_ASSERT(r, fslo[0] == 1);
|
||||
REPORTER_ASSERT(r, fslo[1] == -1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user