Implement Sk4i's abs, min, max

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Debian9-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD
Bug: skia:
Change-Id: Ia9ec3f72095e1c744f88df7bb990d99e0f87d578
Reviewed-on: https://skia-review.googlesource.com/22720
Commit-Queue: Yuqian Li <liyuqian@google.com>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
Yuqian Li 2017-07-12 13:36:05 -04:00 committed by Skia Commit-Bot
parent f0ca0e0844
commit 7da6ba2d63
4 changed files with 55 additions and 0 deletions

View File

@ -218,6 +218,8 @@ struct SkNx<1,T> {
private:
// Helper functions to choose the right float/double methods. (In <cmath> madness lies...)
AI static int Abs(int val) { return val < 0 ? -val : val; }
AI static float Abs(float val) { return ::fabsf(val); }
AI static float Sqrt(float val) { return ::sqrtf(val); }
AI static float Floor(float val) { return ::floorf(val); }

View File

@ -433,12 +433,15 @@ public:
}
AI static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.fVec); }
AI static SkNx Max(const SkNx& a, const SkNx& b) { return vmaxq_s32(a.fVec, b.fVec); }
// TODO as needed
AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec);
}
AI SkNx abs() const { return vabsq_s32(fVec); }
int32x4_t fVec;
};

View File

@ -198,6 +198,37 @@ public:
#endif
}
AI SkNx abs() const {
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
return _mm_abs_epi32(fVec);
#else
SkNx mask = (*this) >> 31;
return (mask ^ (*this)) - mask;
#endif
}
AI static SkNx Min(const SkNx& x, const SkNx& y) {
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
return _mm_min_epi32(x.fVec, y.fVec);
#else
__m128i less_than = _mm_cmplt_epi32(x.fVec, y.fVec);
__m128i choose_x = _mm_and_si128(less_than, x.fVec);
__m128i choose_y = _mm_andnot_si128(less_than, y.fVec);
return _mm_or_si128(choose_x, choose_y);
#endif
}
AI static SkNx Max(const SkNx& x, const SkNx& y) {
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
return _mm_max_epi32(x.fVec, y.fVec);
#else
__m128i greater_than = _mm_cmpgt_epi32(x.fVec, y.fVec);
__m128i choose_x = _mm_and_si128(greater_than, x.fVec);
__m128i choose_y = _mm_andnot_si128(greater_than, y.fVec);
return _mm_or_si128(choose_x, choose_y);
#endif
}
__m128i fVec;
};

View File

@ -212,6 +212,25 @@ DEF_TEST(SkNx_abs, r) {
REPORTER_ASSERT(r, fs[3] == 4.0f);
}
DEF_TEST(Sk4i_abs, r) {
auto is = Sk4i(0, -1, 2, -2147483647).abs();
REPORTER_ASSERT(r, is[0] == 0);
REPORTER_ASSERT(r, is[1] == 1);
REPORTER_ASSERT(r, is[2] == 2);
REPORTER_ASSERT(r, is[3] == 2147483647);
}
DEF_TEST(Sk4i_minmax, r) {
auto a = Sk4i(0, 2, 4, 6);
auto b = Sk4i(1, 1, 3, 7);
auto min = Sk4i::Min(a, b);
auto max = Sk4i::Max(a, b);
for(int i = 0; i < 4; ++i) {
REPORTER_ASSERT(r, min[i] == SkTMin(a[i], b[i]));
REPORTER_ASSERT(r, max[i] == SkTMax(a[i], b[i]));
}
}
DEF_TEST(SkNx_floor, r) {
auto fs = Sk4f(0.4f, -0.4f, 0.6f, -0.6f).floor();
REPORTER_ASSERT(r, fs[0] == 0.0f);