skia2/include/private/SkFloatingPoint.h

278 lines
9.5 KiB
C
Raw Normal View History

/*
Automatic update of all copyright notices to reflect new license terms. I have manually examined all of these diffs and restored a few files that seem to require manual adjustment. The following files still need to be modified manually, in a separate CL: android_sample/SampleApp/AndroidManifest.xml android_sample/SampleApp/res/layout/layout.xml android_sample/SampleApp/res/menu/sample.xml android_sample/SampleApp/res/values/strings.xml android_sample/SampleApp/src/com/skia/sampleapp/SampleApp.java android_sample/SampleApp/src/com/skia/sampleapp/SampleView.java experimental/CiCarbonSampleMain.c experimental/CocoaDebugger/main.m experimental/FileReaderApp/main.m experimental/SimpleCocoaApp/main.m experimental/iOSSampleApp/Shared/SkAlertPrompt.h experimental/iOSSampleApp/Shared/SkAlertPrompt.m experimental/iOSSampleApp/SkiOSSampleApp-Base.xcconfig experimental/iOSSampleApp/SkiOSSampleApp-Debug.xcconfig experimental/iOSSampleApp/SkiOSSampleApp-Release.xcconfig gpu/src/android/GrGLDefaultInterface_android.cpp gyp/common.gypi gyp_skia include/ports/SkHarfBuzzFont.h include/views/SkOSWindow_wxwidgets.h make.bat make.py src/opts/memset.arm.S src/opts/memset16_neon.S src/opts/memset32_neon.S src/opts/opts_check_arm.cpp src/ports/SkDebug_brew.cpp src/ports/SkMemory_brew.cpp src/ports/SkOSFile_brew.cpp src/ports/SkXMLParser_empty.cpp src/utils/ios/SkImageDecoder_iOS.mm src/utils/ios/SkOSFile_iOS.mm src/utils/ios/SkStream_NSData.mm tests/FillPathTest.cpp Review URL: http://codereview.appspot.com/4816058 git-svn-id: http://skia.googlecode.com/svn/trunk@1982 2bbb7eff-a529-9590-31e7-b0007b416f81
2011-07-28 14:26:00 +00:00
* Copyright 2006 The Android Open Source Project
*
Automatic update of all copyright notices to reflect new license terms. I have manually examined all of these diffs and restored a few files that seem to require manual adjustment. The following files still need to be modified manually, in a separate CL: android_sample/SampleApp/AndroidManifest.xml android_sample/SampleApp/res/layout/layout.xml android_sample/SampleApp/res/menu/sample.xml android_sample/SampleApp/res/values/strings.xml android_sample/SampleApp/src/com/skia/sampleapp/SampleApp.java android_sample/SampleApp/src/com/skia/sampleapp/SampleView.java experimental/CiCarbonSampleMain.c experimental/CocoaDebugger/main.m experimental/FileReaderApp/main.m experimental/SimpleCocoaApp/main.m experimental/iOSSampleApp/Shared/SkAlertPrompt.h experimental/iOSSampleApp/Shared/SkAlertPrompt.m experimental/iOSSampleApp/SkiOSSampleApp-Base.xcconfig experimental/iOSSampleApp/SkiOSSampleApp-Debug.xcconfig experimental/iOSSampleApp/SkiOSSampleApp-Release.xcconfig gpu/src/android/GrGLDefaultInterface_android.cpp gyp/common.gypi gyp_skia include/ports/SkHarfBuzzFont.h include/views/SkOSWindow_wxwidgets.h make.bat make.py src/opts/memset.arm.S src/opts/memset16_neon.S src/opts/memset32_neon.S src/opts/opts_check_arm.cpp src/ports/SkDebug_brew.cpp src/ports/SkMemory_brew.cpp src/ports/SkOSFile_brew.cpp src/ports/SkXMLParser_empty.cpp src/utils/ios/SkImageDecoder_iOS.mm src/utils/ios/SkOSFile_iOS.mm src/utils/ios/SkStream_NSData.mm tests/FillPathTest.cpp Review URL: http://codereview.appspot.com/4816058 git-svn-id: http://skia.googlecode.com/svn/trunk@1982 2bbb7eff-a529-9590-31e7-b0007b416f81
2011-07-28 14:26:00 +00:00
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkFloatingPoint_DEFINED
#define SkFloatingPoint_DEFINED
#include "include/core/SkTypes.h"
#include "include/private/SkFloatBits.h"
#include "include/private/SkSafe_math.h"
#include <float.h>
#include <math.h>
Move test around in cubic_solver to test function, not delta_t. Add SkOpts variant for avx2 to get FMA Decrease tolerance now that we're testing the function Before 15/15 MB 1 1.13ms 1.17ms 1.18ms 1.26ms 4% cubicmap_0_1_1_1 15/15 MB 1 1.08ms 1.13ms 1.12ms 1.17ms 3% cubicmap_0_1_1_0 15/15 MB 1 862µs 904µs 900µs 937µs 3% cubicmap_0_1_0_1 15/15 MB 1 861µs 878µs 882µs 934µs 3% cubicmap_0_1_0_0 15/15 MB 1 1.44ms 1.47ms 1.49ms 1.55ms 3% cubicmap_1_0_1_1 15/15 MB 1 1.44ms 1.48ms 1.48ms 1.55ms 3% cubicmap_1_0_1_0 15/15 MB 1 1.42ms 1.42ms 1.46ms 1.53ms 3% cubicmap_1_0_0_1 15/15 MB 1 1.42ms 1.42ms 1.44ms 1.51ms 2% cubicmap_1_0_0_0 After moving the check to the function, not delta_t 15/15 MB 1 900µs 900µs 915µs 971µs 3% cubicmap_0_1_1_1 15/15 MB 1 899µs 900µs 914µs 988µs 3% cubicmap_0_1_1_0 15/15 MB 1 865µs 896µs 890µs 946µs 3% cubicmap_0_1_0_1 15/15 MB 1 866µs 910µs 914µs 959µs 3% cubicmap_0_1_0_0 15/15 MB 1 1.29ms 1.29ms 1.33ms 1.44ms 4% cubicmap_1_0_1_1 15/15 MB 1 1.28ms 1.29ms 1.34ms 1.54ms 6% cubicmap_1_0_1_0 15/15 MB 1 1.26ms 1.26ms 1.27ms 1.34ms 3% cubicmap_1_0_0_1 15/15 MB 1 1.26ms 1.26ms 1.27ms 1.3ms 2% cubicmap_1_0_0_0 After SkOpts (on an avx2 machine) 15/15 MB 1 613µs 613µs 616µs 646µs 2% cubicmap_0_1_1_1 15/15 MB 1 613µs 613µs 624µs 654µs 3% cubicmap_0_1_1_0 15/15 MB 1 862µs 865µs 867µs 887µs 1% cubicmap_0_1_0_1 15/15 MB 1 865µs 901µs 896µs 949µs 3% cubicmap_0_1_0_0 15/15 MB 1 849µs 850µs 868µs 929µs 4% cubicmap_1_0_1_1 15/15 MB 1 849µs 850µs 873µs 940µs 4% cubicmap_1_0_1_0 15/15 MB 1 831µs 831µs 856µs 950µs 5% cubicmap_1_0_0_1 15/15 MB 1 831µs 831µs 848µs 911µs 3% cubicmap_1_0_0_0 (not checked in) if we also enable the pragma in cubic_solver 15/15 MB 1 593µs 594µs 597µs 623µs 2% cubicmap_0_1_1_1 15/15 MB 1 593µs 595µs 605µs 629µs 2% cubicmap_0_1_1_0 15/15 MB 1 864µs 867µs 869µs 890µs 1% cubicmap_0_1_0_1 15/15 MB 1 864µs 866µs 886µs 950µs 4% cubicmap_0_1_0_0 15/15 MB 1 809µs 831µs 841µs 891µs 4% cubicmap_1_0_1_1 15/15 MB 1 809µs 810µs 855µs 1.11ms 11% cubicmap_1_0_1_0 15/15 MB 1 794µs 861µs 856µs 914µs 4% cubicmap_1_0_0_1 15/15 MB 1 794µs 821µs 818µs 853µs 3% cubicmap_1_0_0_0 Change-Id: I260391be956d31a5cf3d0367d1285e56af7568f8 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/226499 Reviewed-by: Mike Reed <reed@google.com> Reviewed-by: Mike Klein <mtklein@google.com> Commit-Queue: Mike Reed <reed@google.com> Auto-Submit: Mike Reed <reed@google.com>
2019-07-10 16:16:39 +00:00
#include <cmath>
#include <cstring>
#include <limits>
#if defined(SK_LEGACY_FLOAT_RSQRT)
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
#include <xmmintrin.h>
#elif defined(SK_ARM_HAS_NEON)
#include <arm_neon.h>
#endif
#endif
// For _POSIX_VERSION
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
#include <unistd.h>
#endif
constexpr float SK_FloatSqrt2 = 1.41421356f;
constexpr float SK_FloatPI = 3.14159265f;
constexpr double SK_DoublePI = 3.14159265358979323846264338327950288;
// C++98 cmath std::pow seems to be the earliest portable way to get float pow.
// However, on Linux including cmath undefines isfinite.
// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14608
static inline float sk_float_pow(float base, float exp) {
return powf(base, exp);
}
#define sk_float_sqrt(x) sqrtf(x)
#define sk_float_sin(x) sinf(x)
#define sk_float_cos(x) cosf(x)
#define sk_float_tan(x) tanf(x)
#define sk_float_floor(x) floorf(x)
#define sk_float_ceil(x) ceilf(x)
#define sk_float_trunc(x) truncf(x)
#ifdef SK_BUILD_FOR_MAC
# define sk_float_acos(x) static_cast<float>(acos(x))
# define sk_float_asin(x) static_cast<float>(asin(x))
#else
# define sk_float_acos(x) acosf(x)
# define sk_float_asin(x) asinf(x)
#endif
#define sk_float_atan2(y,x) atan2f(y,x)
#define sk_float_abs(x) fabsf(x)
#define sk_float_copysign(x, y) copysignf(x, y)
#define sk_float_mod(x,y) fmodf(x,y)
#define sk_float_exp(x) expf(x)
#define sk_float_log(x) logf(x)
constexpr float sk_float_degrees_to_radians(float degrees) {
return degrees * (SK_FloatPI / 180);
}
constexpr float sk_float_radians_to_degrees(float radians) {
return radians * (180 / SK_FloatPI);
}
#define sk_float_round(x) sk_float_floor((x) + 0.5f)
// can't find log2f on android, but maybe that just a tool bug?
#ifdef SK_BUILD_FOR_ANDROID
static inline float sk_float_log2(float x) {
const double inv_ln_2 = 1.44269504088896;
return (float)(log(x) * inv_ln_2);
}
#else
#define sk_float_log2(x) log2f(x)
#endif
static inline bool sk_float_isfinite(float x) {
return SkFloatBits_IsFinite(SkFloat2Bits(x));
}
static inline bool sk_floats_are_finite(float a, float b) {
return sk_float_isfinite(a) && sk_float_isfinite(b);
}
static inline bool sk_floats_are_finite(const float array[], int count) {
float prod = 0;
for (int i = 0; i < count; ++i) {
prod *= array[i];
}
// At this point, prod will either be NaN or 0
return prod == 0; // if prod is NaN, this check will return false
}
static inline bool sk_float_isinf(float x) {
return SkFloatBits_IsInf(SkFloat2Bits(x));
}
static inline bool sk_float_isnan(float x) {
return !(x == x);
}
#define sk_double_isnan(a) sk_float_isnan(a)
#define SK_MaxS32FitsInFloat 2147483520
#define SK_MinS32FitsInFloat -SK_MaxS32FitsInFloat
#define SK_MaxS64FitsInFloat (SK_MaxS64 >> (63-24) << (63-24)) // 0x7fffff8000000000
#define SK_MinS64FitsInFloat -SK_MaxS64FitsInFloat
/**
* Return the closest int for the given float. Returns SK_MaxS32FitsInFloat for NaN.
*/
static inline int sk_float_saturate2int(float x) {
x = x < SK_MaxS32FitsInFloat ? x : SK_MaxS32FitsInFloat;
x = x > SK_MinS32FitsInFloat ? x : SK_MinS32FitsInFloat;
return (int)x;
}
/**
* Return the closest int for the given double. Returns SK_MaxS32 for NaN.
*/
static inline int sk_double_saturate2int(double x) {
x = x < SK_MaxS32 ? x : SK_MaxS32;
x = x > SK_MinS32 ? x : SK_MinS32;
return (int)x;
}
/**
* Return the closest int64_t for the given float. Returns SK_MaxS64FitsInFloat for NaN.
*/
static inline int64_t sk_float_saturate2int64(float x) {
x = x < SK_MaxS64FitsInFloat ? x : SK_MaxS64FitsInFloat;
x = x > SK_MinS64FitsInFloat ? x : SK_MinS64FitsInFloat;
return (int64_t)x;
}
#define sk_float_floor2int(x) sk_float_saturate2int(sk_float_floor(x))
#define sk_float_round2int(x) sk_float_saturate2int(sk_float_floor((x) + 0.5f))
#define sk_float_ceil2int(x) sk_float_saturate2int(sk_float_ceil(x))
#define sk_float_floor2int_no_saturate(x) (int)sk_float_floor(x)
#define sk_float_round2int_no_saturate(x) (int)sk_float_floor((x) + 0.5f)
#define sk_float_ceil2int_no_saturate(x) (int)sk_float_ceil(x)
#define sk_double_floor(x) floor(x)
#define sk_double_round(x) floor((x) + 0.5)
#define sk_double_ceil(x) ceil(x)
#define sk_double_floor2int(x) (int)floor(x)
#define sk_double_round2int(x) (int)floor((x) + 0.5)
#define sk_double_ceil2int(x) (int)ceil(x)
// Cast double to float, ignoring any warning about too-large finite values being cast to float.
// Clang thinks this is undefined, but it's actually implementation defined to return either
// the largest float or infinity (one of the two bracketing representable floats). Good enough!
SK_ATTRIBUTE(no_sanitize("float-cast-overflow"))
static inline float sk_double_to_float(double x) {
return static_cast<float>(x);
}
#define SK_FloatNaN std::numeric_limits<float>::quiet_NaN()
#define SK_FloatInfinity (+std::numeric_limits<float>::infinity())
#define SK_FloatNegativeInfinity (-std::numeric_limits<float>::infinity())
#define SK_DoubleNaN std::numeric_limits<double>::quiet_NaN()
// Returns false if any of the floats are outside of [0...1]
// Returns true if count is 0
bool sk_floats_are_unit(const float array[], size_t count);
#if defined(SK_LEGACY_FLOAT_RSQRT)
static inline float sk_float_rsqrt_portable(float x) {
// Get initial estimate.
int i;
memcpy(&i, &x, 4);
i = 0x5F1FFFF9 - (i>>1);
float estimate;
memcpy(&estimate, &i, 4);
// One step of Newton's method to refine.
const float estimate_sq = estimate*estimate;
estimate *= 0.703952253f*(2.38924456f-x*estimate_sq);
return estimate;
}
// Fast, approximate inverse square root.
// Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster on SSE, 2x on NEON.
static inline float sk_float_rsqrt(float x) {
// We want all this inlined, so we'll inline SIMD and just take the hit when we don't know we've got
// it at compile time. This is going to be too fast to productively hide behind a function pointer.
//
// We do one step of Newton's method to refine the estimates in the NEON and portable paths. No
// refinement is faster, but very innacurate. Two steps is more accurate, but slower than 1/sqrt.
//
// Optimized constants in the portable path courtesy of http://rrrola.wz.cz/inv_sqrt.html
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x)));
#elif defined(SK_ARM_HAS_NEON)
// Get initial estimate.
const float32x2_t xx = vdup_n_f32(x); // Clever readers will note we're doing everything 2x.
float32x2_t estimate = vrsqrte_f32(xx);
// One step of Newton's method to refine.
const float32x2_t estimate_sq = vmul_f32(estimate, estimate);
estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq));
return vget_lane_f32(estimate, 0); // 1 will work fine too; the answer's in both places.
#else
return sk_float_rsqrt_portable(x);
#endif
}
#else
static inline float sk_float_rsqrt_portable(float x) { return 1.0f / sk_float_sqrt(x); }
static inline float sk_float_rsqrt (float x) { return 1.0f / sk_float_sqrt(x); }
#endif
// Returns the log2 of the provided value, were that value to be rounded up to the next power of 2.
// Returns 0 if value <= 0:
// Never returns a negative number, even if value is NaN.
//
// sk_float_nextlog2((-inf..1]) -> 0
// sk_float_nextlog2((1..2]) -> 1
// sk_float_nextlog2((2..4]) -> 2
// sk_float_nextlog2((4..8]) -> 3
// ...
static inline int sk_float_nextlog2(float x) {
uint32_t bits = (uint32_t)SkFloat2Bits(x);
bits += (1u << 23) - 1u; // Increment the exponent for non-powers-of-2.
int exp = ((int32_t)bits >> 23) - 127;
return exp & ~(exp >> 31); // Return 0 for negative or denormalized floats, and exponents < 0.
}
// This is the number of significant digits we can print in a string such that when we read that
// string back we get the floating point number we expect. The minimum value C requires is 6, but
// most compilers support 9
#ifdef FLT_DECIMAL_DIG
#define SK_FLT_DECIMAL_DIG FLT_DECIMAL_DIG
#else
#define SK_FLT_DECIMAL_DIG 9
#endif
// IEEE defines how float divide behaves for non-finite values and zero-denoms, but C does not
// so we have a helper that suppresses the possible undefined-behavior warnings.
SK_ATTRIBUTE(no_sanitize("float-divide-by-zero"))
static inline float sk_ieee_float_divide(float numer, float denom) {
return numer / denom;
}
SK_ATTRIBUTE(no_sanitize("float-divide-by-zero"))
static inline double sk_ieee_double_divide(double numer, double denom) {
return numer / denom;
}
// While we clean up divide by zero, we'll replace places that do divide by zero with this TODO.
static inline float sk_ieee_float_divide_TODO_IS_DIVIDE_BY_ZERO_SAFE_HERE(float n, float d) {
return sk_ieee_float_divide(n,d);
}
Move test around in cubic_solver to test function, not delta_t. Add SkOpts variant for avx2 to get FMA Decrease tolerance now that we're testing the function Before 15/15 MB 1 1.13ms 1.17ms 1.18ms 1.26ms 4% cubicmap_0_1_1_1 15/15 MB 1 1.08ms 1.13ms 1.12ms 1.17ms 3% cubicmap_0_1_1_0 15/15 MB 1 862µs 904µs 900µs 937µs 3% cubicmap_0_1_0_1 15/15 MB 1 861µs 878µs 882µs 934µs 3% cubicmap_0_1_0_0 15/15 MB 1 1.44ms 1.47ms 1.49ms 1.55ms 3% cubicmap_1_0_1_1 15/15 MB 1 1.44ms 1.48ms 1.48ms 1.55ms 3% cubicmap_1_0_1_0 15/15 MB 1 1.42ms 1.42ms 1.46ms 1.53ms 3% cubicmap_1_0_0_1 15/15 MB 1 1.42ms 1.42ms 1.44ms 1.51ms 2% cubicmap_1_0_0_0 After moving the check to the function, not delta_t 15/15 MB 1 900µs 900µs 915µs 971µs 3% cubicmap_0_1_1_1 15/15 MB 1 899µs 900µs 914µs 988µs 3% cubicmap_0_1_1_0 15/15 MB 1 865µs 896µs 890µs 946µs 3% cubicmap_0_1_0_1 15/15 MB 1 866µs 910µs 914µs 959µs 3% cubicmap_0_1_0_0 15/15 MB 1 1.29ms 1.29ms 1.33ms 1.44ms 4% cubicmap_1_0_1_1 15/15 MB 1 1.28ms 1.29ms 1.34ms 1.54ms 6% cubicmap_1_0_1_0 15/15 MB 1 1.26ms 1.26ms 1.27ms 1.34ms 3% cubicmap_1_0_0_1 15/15 MB 1 1.26ms 1.26ms 1.27ms 1.3ms 2% cubicmap_1_0_0_0 After SkOpts (on an avx2 machine) 15/15 MB 1 613µs 613µs 616µs 646µs 2% cubicmap_0_1_1_1 15/15 MB 1 613µs 613µs 624µs 654µs 3% cubicmap_0_1_1_0 15/15 MB 1 862µs 865µs 867µs 887µs 1% cubicmap_0_1_0_1 15/15 MB 1 865µs 901µs 896µs 949µs 3% cubicmap_0_1_0_0 15/15 MB 1 849µs 850µs 868µs 929µs 4% cubicmap_1_0_1_1 15/15 MB 1 849µs 850µs 873µs 940µs 4% cubicmap_1_0_1_0 15/15 MB 1 831µs 831µs 856µs 950µs 5% cubicmap_1_0_0_1 15/15 MB 1 831µs 831µs 848µs 911µs 3% cubicmap_1_0_0_0 (not checked in) if we also enable the pragma in cubic_solver 15/15 MB 1 593µs 594µs 597µs 623µs 2% cubicmap_0_1_1_1 15/15 MB 1 593µs 595µs 605µs 629µs 2% cubicmap_0_1_1_0 15/15 MB 1 864µs 867µs 869µs 890µs 1% cubicmap_0_1_0_1 15/15 MB 1 864µs 866µs 886µs 950µs 4% cubicmap_0_1_0_0 15/15 MB 1 809µs 831µs 841µs 891µs 4% cubicmap_1_0_1_1 15/15 MB 1 809µs 810µs 855µs 1.11ms 11% cubicmap_1_0_1_0 15/15 MB 1 794µs 861µs 856µs 914µs 4% cubicmap_1_0_0_1 15/15 MB 1 794µs 821µs 818µs 853µs 3% cubicmap_1_0_0_0 Change-Id: I260391be956d31a5cf3d0367d1285e56af7568f8 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/226499 Reviewed-by: Mike Reed <reed@google.com> Reviewed-by: Mike Klein <mtklein@google.com> Commit-Queue: Mike Reed <reed@google.com> Auto-Submit: Mike Reed <reed@google.com>
2019-07-10 16:16:39 +00:00
static inline float sk_fmaf(float f, float m, float a) {
#if defined(FP_FAST_FMA)
return std::fmaf(f,m,a);
#else
return f*m+a;
#endif
}
#endif