Add sqrt() and rsqrt() to Sk4f.

This doesn't add them to the second-stringer Sk4i.  It's unclear we should be
doing that often, and we don't have efficient ways to do it except via floats.

BUG=skia:

Review URL: https://codereview.chromium.org/964603002
This commit is contained in:
mtklein 2015-02-26 12:48:05 -08:00 committed by Commit bot
parent 2719552fb1
commit 24aa0f0679
4 changed files with 34 additions and 0 deletions

View File

@ -50,6 +50,9 @@ public:
Sk4x multiply(const Sk4x&) const;
Sk4x divide(const Sk4x&) const;
Sk4x rsqrt() const; // Approximate reciprocal sqrt().
Sk4x sqrt() const; // this->multiply(this->rsqrt()) may be faster, but less precise.
Sk4i equal(const Sk4x&) const;
Sk4i notEqual(const Sk4x&) const;
Sk4i lessThan(const Sk4x&) const;

View File

@ -2,6 +2,8 @@
// This file will be intentionally included three times.
#if defined(SK4X_PREAMBLE)
#include "SkFloatingPoint.h"
#include <math.h>
#elif defined(SK4X_PRIVATE)
typedef T Type;
@ -60,6 +62,20 @@ M(Sk4x<T>) multiply(const Sk4x<T>& other) const { return Sk4x(BINOP(*)); }
M(Sk4x<T>) divide(const Sk4x<T>& other) const { return Sk4x(BINOP(/)); }
#undef BINOP
template<> inline Sk4f Sk4f::rsqrt() const {
return Sk4f(sk_float_rsqrt(fVec[0]),
sk_float_rsqrt(fVec[1]),
sk_float_rsqrt(fVec[2]),
sk_float_rsqrt(fVec[3]));
}
template<> inline Sk4f Sk4f::sqrt() const {
return Sk4f(sqrtf(fVec[0]),
sqrtf(fVec[1]),
sqrtf(fVec[2]),
sqrtf(fVec[3]));
}
#define BOOL_BINOP(op) fVec[0] op other.fVec[0] ? -1 : 0, \
fVec[1] op other.fVec[1] ? -1 : 0, \
fVec[2] op other.fVec[2] ? -1 : 0, \

View File

@ -99,6 +99,9 @@ M(Sk4f) subtract(const Sk4f& o) const { return _mm_sub_ps(fVec, o.fVec); }
M(Sk4f) multiply(const Sk4f& o) const { return _mm_mul_ps(fVec, o.fVec); }
M(Sk4f) divide (const Sk4f& o) const { return _mm_div_ps(fVec, o.fVec); }
M(Sk4f) rsqrt() const { return _mm_rsqrt_ps(fVec); }
M(Sk4f) sqrt() const { return _mm_sqrt_ps( fVec); }
M(Sk4i) equal (const Sk4f& o) const { return _mm_cmpeq_ps (fVec, o.fVec); }
M(Sk4i) notEqual (const Sk4f& o) const { return _mm_cmpneq_ps(fVec, o.fVec); }
M(Sk4i) lessThan (const Sk4f& o) const { return _mm_cmplt_ps (fVec, o.fVec); }

View File

@ -87,6 +87,18 @@ DEF_TEST(Sk4x_ImplicitPromotion, r) {
ASSERT_EQ(Sk4f(2,4,6,8), Sk4f(1,2,3,4).multiply(2.0f));
}
DEF_TEST(Sk4x_Sqrt, r) {
Sk4f squares(4, 16, 25, 121),
roots(2, 4, 5, 11);
// .sqrt() should be pretty precise.
ASSERT_EQ(roots, squares.sqrt());
// .rsqrt() isn't so precise, but should be pretty close.
Sk4f error = roots.subtract(squares.multiply(squares.rsqrt()));
REPORTER_ASSERT(r, error.greaterThan(0.0f).allTrue());
REPORTER_ASSERT(r, error.lessThan(0.01f).allTrue());
}
DEF_TEST(Sk4x_Comparison, r) {
ASSERT_EQ(Sk4f(1,2,3,4), Sk4f(1,2,3,4));
ASSERT_NE(Sk4f(4,3,2,1), Sk4f(1,2,3,4));