diff --git a/include/private/SkFloatingPoint.h b/include/private/SkFloatingPoint.h index ffed5c0747..f7ee816b12 100644 --- a/include/private/SkFloatingPoint.h +++ b/include/private/SkFloatingPoint.h @@ -127,28 +127,20 @@ extern const uint32_t gIEEENegativeInfinity; #define SK_FloatInfinity (*SkTCast(&gIEEEInfinity)) #define SK_FloatNegativeInfinity (*SkTCast(&gIEEENegativeInfinity)) -static inline float sk_float_rsqrt_portable(float x) { - // Get initial estimate. - int i = *SkTCast(&x); - i = 0x5F1FFFF9 - (i>>1); - float estimate = *SkTCast(&i); - - // One step of Newton's method to refine. - const float estimate_sq = estimate*estimate; - estimate *= 0.703952253f*(2.38924456f-x*estimate_sq); - return estimate; -} +// We forward declare this to break an #include cycle. +// (SkScalar -> SkFloatingPoint -> SkOpts.h -> SkXfermode -> SkColor -> SkScalar) +namespace SkOpts { extern float (*rsqrt)(float); } // Fast, approximate inverse square root. // Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster on SSE, 2x on NEON. -static inline float sk_float_rsqrt(float x) { +static inline float sk_float_rsqrt(const float x) { // We want all this inlined, so we'll inline SIMD and just take the hit when we don't know we've got // it at compile time. This is going to be too fast to productively hide behind a function pointer. // -// We do one step of Newton's method to refine the estimates in the NEON and portable paths. No +// We do one step of Newton's method to refine the estimates in the NEON and null paths. No // refinement is faster, but very innacurate. Two steps is more accurate, but slower than 1/sqrt. // -// Optimized constants in the portable path courtesy of http://rrrola.wz.cz/inv_sqrt.html +// Optimized constants in the null path courtesy of http://rrrola.wz.cz/inv_sqrt.html #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x))); #elif defined(SK_ARM_HAS_NEON) @@ -161,7 +153,8 @@ static inline float sk_float_rsqrt(float x) { estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq)); return vget_lane_f32(estimate, 0); // 1 will work fine too; the answer's in both places. #else - return sk_float_rsqrt_portable(x); + // Perhaps runtime-detected NEON, or a portable fallback. + return SkOpts::rsqrt(x); #endif } diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 669401b417..28dd1afc71 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -13,6 +13,7 @@ #include "SkBlitRow_opts.h" #include "SkBlurImageFilter_opts.h" #include "SkColorCubeFilter_opts.h" +#include "SkFloatingPoint_opts.h" #include "SkMatrix_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkSwizzler_opts.h" @@ -54,6 +55,7 @@ namespace SkOpts { // If our global compile options are set high enough, these defaults might even be // CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults. // They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1. + decltype(rsqrt) rsqrt = sk_default::rsqrt; decltype(memset16) memset16 = sk_default::memset16; decltype(memset32) memset32 = sk_default::memset32; decltype(create_xfermode) create_xfermode = sk_default::create_xfermode; diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 41ad8ebfe8..1a9820b417 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -23,6 +23,9 @@ namespace SkOpts { // Declare function pointers here... + // Returns a fast approximation of 1.0f/sqrtf(x). + extern float (*rsqrt)(float); + // See SkUtils.h extern void (*memset16)(uint16_t[], uint16_t, int); extern void (*memset32)(uint32_t[], uint32_t, int); diff --git a/src/opts/SkFloatingPoint_opts.h b/src/opts/SkFloatingPoint_opts.h new file mode 100644 index 0000000000..8b6536ad7f --- /dev/null +++ b/src/opts/SkFloatingPoint_opts.h @@ -0,0 +1,35 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkFloatingPoint_opts_DEFINED +#define SkFloatingPoint_opts_DEFINED + +#include "SkFloatingPoint.h" + +namespace SK_OPTS_NS { + +#if defined(SK_ARM_HAS_NEON) + static float rsqrt(float x) { + return sk_float_rsqrt(x); // This sk_float_rsqrt copy will take the NEON compile-time path. + } +#else + static float rsqrt(float x) { + // Get initial estimate. + int i = *SkTCast(&x); + i = 0x5F1FFFF9 - (i>>1); + float estimate = *SkTCast(&i); + + // One step of Newton's method to refine. + const float estimate_sq = estimate*estimate; + estimate *= 0.703952253f*(2.38924456f-x*estimate_sq); + return estimate; + } +#endif + +} // namespace SK_OPTS_NS + +#endif//SkFloatingPoint_opts_DEFINED diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index dcb057e1fe..9cff2298e4 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -12,6 +12,7 @@ #include "SkBlitRow_opts.h" #include "SkBlurImageFilter_opts.h" #include "SkColorCubeFilter_opts.h" +#include "SkFloatingPoint_opts.h" #include "SkMatrix_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkSwizzler_opts.h" @@ -21,6 +22,7 @@ namespace SkOpts { void Init_neon() { + rsqrt = sk_neon::rsqrt; memset16 = sk_neon::memset16; memset32 = sk_neon::memset32; create_xfermode = sk_neon::create_xfermode; diff --git a/tests/MathTest.cpp b/tests/MathTest.cpp index de7ad1db78..24e46f3097 100644 --- a/tests/MathTest.cpp +++ b/tests/MathTest.cpp @@ -382,15 +382,14 @@ static void unittest_half(skiatest::Reporter* reporter) { } -template -static void test_rsqrt(skiatest::Reporter* reporter, RSqrtFn rsqrt) { +static void test_rsqrt(skiatest::Reporter* reporter) { const float maxRelativeError = 6.50196699e-4f; // test close to 0 up to 1 float input = 0.000001f; for (int i = 0; i < 1000; ++i) { float exact = 1.0f/sk_float_sqrt(input); - float estimate = rsqrt(input); + float estimate = sk_float_rsqrt(input); float relativeError = sk_float_abs(exact - estimate)/exact; REPORTER_ASSERT(reporter, relativeError <= maxRelativeError); input += 0.001f; @@ -400,7 +399,7 @@ static void test_rsqrt(skiatest::Reporter* reporter, RSqrtFn rsqrt) { input = 1.0f; for (int i = 0; i < 1000; ++i) { float exact = 1.0f/sk_float_sqrt(input); - float estimate = rsqrt(input); + float estimate = sk_float_rsqrt(input); float relativeError = sk_float_abs(exact - estimate)/exact; REPORTER_ASSERT(reporter, relativeError <= maxRelativeError); input += 0.01f; @@ -410,7 +409,7 @@ static void test_rsqrt(skiatest::Reporter* reporter, RSqrtFn rsqrt) { input = 1000000.0f; for (int i = 0; i < 100; ++i) { float exact = 1.0f/sk_float_sqrt(input); - float estimate = rsqrt(input); + float estimate = sk_float_rsqrt(input); float relativeError = sk_float_abs(exact - estimate)/exact; REPORTER_ASSERT(reporter, relativeError <= maxRelativeError); input += 754326.f; @@ -556,8 +555,7 @@ DEF_TEST(Math, reporter) { unittest_fastfloat(reporter); unittest_isfinite(reporter); unittest_half(reporter); - test_rsqrt(reporter, sk_float_rsqrt); - test_rsqrt(reporter, sk_float_rsqrt_portable); + test_rsqrt(reporter); for (i = 0; i < 10000; i++) { SkFixed numer = rand.nextS();