diff --git a/include/private/SkFloatingPoint.h b/include/private/SkFloatingPoint.h index 984a436319..adac6897c0 100644 --- a/include/private/SkFloatingPoint.h +++ b/include/private/SkFloatingPoint.h @@ -13,6 +13,7 @@ #include "include/private/SkSafe_math.h" #include #include +#include #include #include @@ -248,4 +249,12 @@ static inline float sk_ieee_double_divide_TODO_IS_DIVIDE_BY_ZERO_SAFE_HERE(doubl return sk_ieee_double_divide(n,d); } +static inline float sk_fmaf(float f, float m, float a) { +#if defined(FP_FAST_FMA) + return std::fmaf(f,m,a); +#else + return f*m+a; +#endif +} + #endif diff --git a/src/core/SkCubicMap.cpp b/src/core/SkCubicMap.cpp index b6366d66ae..30cd3b4bed 100644 --- a/src/core/SkCubicMap.cpp +++ b/src/core/SkCubicMap.cpp @@ -7,6 +7,7 @@ #include "include/core/SkCubicMap.h" #include "include/private/SkNx.h" +#include "src/core/SkOpts.h" //#define CUBICMAP_TRACK_MAX_ERROR @@ -14,126 +15,15 @@ #include "src/pathops/SkPathOpsCubic.h" #endif -static float eval_poly3(float a, float b, float c, float d, float t) { - return ((a * t + b) * t + c) * t + d; -} - -static float eval_poly2(float a, float b, float c, float t) { - return (a * t + b) * t + c; -} - -static float eval_poly1(float a, float b, float t) { - return a * t + b; -} - -static float guess_nice_cubic_root(float A, float B, float C, float D) { - return -D; -} - -#ifdef SK_DEBUG -static bool valid(float r) { - return r >= 0 && r <= 1; -} -#endif - static inline bool nearly_zero(SkScalar x) { SkASSERT(x >= 0); return x <= 0.0000000001f; } -static inline bool delta_nearly_zero(float delta) { - return sk_float_abs(delta) <= 0.0001f; -} - #ifdef CUBICMAP_TRACK_MAX_ERROR static int max_iters; #endif -/* - * TODO: will this be faster if we algebraically compute the polynomials for the numer and denom - * rather than compute them in parts? - */ -static float solve_nice_cubic_halley(float A, float B, float C, float D) { - const int MAX_ITERS = 8; - const float A3 = 3 * A; - const float B2 = B + B; - - float t = guess_nice_cubic_root(A, B, C, D); - int iters = 0; - for (; iters < MAX_ITERS; ++iters) { - float f = eval_poly3(A, B, C, D, t); // f = At^3 + Bt^2 + Ct + D - float fp = eval_poly2(A3, B2, C, t); // f' = 3At^2 + 2Bt + C - float fpp = eval_poly1(A3 + A3, B2, t); // f'' = 6At + 2B - - float numer = 2 * fp * f; - if (numer == 0) { - break; - } - float denom = 2 * fp * fp - f * fpp; - float delta = numer / denom; -// SkDebugf("[%d] delta %g t %g\n", iters, delta, t); - if (delta_nearly_zero(delta)) { - break; - } - float new_t = t - delta; - SkASSERT(valid(new_t)); - t = new_t; - } - SkASSERT(valid(t)); -#ifdef CUBICMAP_TRACK_MAX_ERROR - if (iters > max_iters) { - max_iters = iters; - SkDebugf("max_iters %d\n", max_iters); - } -#endif - return t; -} - -// At the moment, this technique does not appear to be better (i.e. faster at same precision) -// but the code is left here (at least for a while) to document the attempt. -static float solve_nice_cubic_householder(float A, float B, float C, float D) { - const int MAX_ITERS = 8; - const float A3 = 3 * A; - const float B2 = B + B; - - float t = guess_nice_cubic_root(A, B, C, D); - int iters = 0; - for (; iters < MAX_ITERS; ++iters) { - float f = eval_poly3(A, B, C, D, t); // f = At^3 + Bt^2 + Ct + D - float fp = eval_poly2(A3, B2, C, t); // f' = 3At^2 + 2Bt + C - float fpp = eval_poly1(A3 + A3, B2, t); // f'' = 6At + 2B - float fppp = A3 + A3; // f''' = 6A - - float f2 = f * f; - float fp2 = fp * fp; - -// float numer = 6 * f * fp * fp - 3 * f * f * fpp; -// float denom = 6 * fp * fp * fp - 6 * f * fp * fpp + f * f * fppp; - - float numer = 6 * f * fp2 - 3 * f2 * fpp; - if (numer == 0) { - break; - } - float denom = 6 * (fp2 * fp - f * fp * fpp) + f2 * fppp; - float delta = numer / denom; - // SkDebugf("[%d] delta %g t %g\n", iters, delta, t); - if (delta_nearly_zero(delta)) { - break; - } - float new_t = t - delta; - SkASSERT(valid(new_t)); - t = new_t; - } - SkASSERT(valid(t)); -#ifdef CUBICMAP_TRACK_MAX_ERROR - if (iters > max_iters) { - max_iters = iters; - SkDebugf("max_iters %d\n", max_iters); - } -#endif - return t; -} - #ifdef CUBICMAP_TRACK_MAX_ERROR static float compute_slow(float A, float B, float C, float x) { double roots[3]; @@ -149,9 +39,8 @@ static float compute_t_from_x(float A, float B, float C, float x) { #ifdef CUBICMAP_TRACK_MAX_ERROR float answer = compute_slow(A, B, C, x); #endif - float answer2 = true ? - solve_nice_cubic_halley(A, B, C, -x) : - solve_nice_cubic_householder(A, B, C, -x); + float answer2 = SkOpts::cubic_solver(A, B, C, -x); + #ifdef CUBICMAP_TRACK_MAX_ERROR float err = sk_float_abs(answer - answer2); if (err > max_err) { diff --git a/src/core/SkCubicSolver.h b/src/core/SkCubicSolver.h new file mode 100644 index 0000000000..f5bd9fef62 --- /dev/null +++ b/src/core/SkCubicSolver.h @@ -0,0 +1,71 @@ +/* + * Copyright 2018 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkCubicSolver_DEFINED +#define SkCubicSolver_DEFINED + +#include "include/core/SkTypes.h" +#include "include/private/SkFloatingPoint.h" + +//#define CUBICMAP_TRACK_MAX_ERROR + +namespace SK_OPTS_NS { + + static float eval_poly(float t, float b) { + return b; + } + + template + static float eval_poly(float t, float m, float b, Rest... rest) { + return eval_poly(t, sk_fmaf(m,t,b), rest...); + } + + inline float cubic_solver(float A, float B, float C, float D) { + #ifdef CUBICMAP_TRACK_MAX_ERROR + static int max_iters = 0; + #endif + + #ifdef SK_DEBUG + auto valid = [](float t) { + return t >= 0 && t <= 1; + }; + #endif + + auto guess_nice_cubic_root = [](float a, float b, float c, float d) { + return -d; + }; + float t = guess_nice_cubic_root(A, B, C, D); + + int iters = 0; + const int MAX_ITERS = 8; + for (; iters < MAX_ITERS; ++iters) { + SkASSERT(valid(t)); + float f = eval_poly(t, A,B,C,D); // f = At^3 + Bt^2 + Ct + D + if (sk_float_abs(f) <= 0.00005f) { + break; + } + float fp = eval_poly(t, 3*A, 2*B, C); // f' = 3At^2 + 2Bt + C + float fpp = eval_poly(t, 3*A+3*A, 2*B); // f'' = 6At + 2B + + float numer = 2 * fp * f; + float denom = sk_fmaf(2*fp, fp, -(f*fpp)); + + t -= numer / denom; + } + + #ifdef CUBICMAP_TRACK_MAX_ERROR + if (max_iters < iters) { + max_iters = iters; + SkDebugf("max_iters %d\n", max_iters); + } + #endif + SkASSERT(valid(t)); + return t; + } + +} // namespace SK_OPTS_NS +#endif diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 24deb783e3..ff330324a5 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -45,6 +45,8 @@ #include "src/opts/SkUtils_opts.h" #include "src/opts/SkXfermode_opts.h" +#include "src/core/SkCubicSolver.h" + namespace SkOpts { // Define default function pointer values here... // If our global compile options are set high enough, these defaults might even be @@ -77,6 +79,8 @@ namespace SkOpts { DEFINE_DEFAULT(rect_memset32); DEFINE_DEFAULT(rect_memset64); + DEFINE_DEFAULT(cubic_solver); + DEFINE_DEFAULT(hash_fn); DEFINE_DEFAULT(S32_alpha_D32_filter_DX); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 3a4d9504d2..309b55018e 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -52,6 +52,8 @@ namespace SkOpts { extern void (*rect_memset32)(uint32_t[], uint32_t, int, size_t, int); extern void (*rect_memset64)(uint64_t[], uint64_t, int, size_t, int); + extern float (*cubic_solver)(float, float, float, float); + // The fastest high quality 32-bit hash we can provide on this platform. extern uint32_t (*hash_fn)(const void*, size_t, uint32_t seed); static inline uint32_t hash(const void* data, size_t bytes, uint32_t seed=0) { diff --git a/src/opts/SkOpts_hsw.cpp b/src/opts/SkOpts_hsw.cpp index fae5c858cd..ade0af3f42 100644 --- a/src/opts/SkOpts_hsw.cpp +++ b/src/opts/SkOpts_hsw.cpp @@ -11,12 +11,15 @@ #include "src/opts/SkBlitRow_opts.h" #include "src/opts/SkRasterPipeline_opts.h" #include "src/opts/SkUtils_opts.h" +#include "src/core/SkCubicSolver.h" namespace SkOpts { void Init_hsw() { blit_row_color32 = hsw::blit_row_color32; blit_row_s32a_opaque = hsw::blit_row_s32a_opaque; + cubic_solver = SK_OPTS_NS::cubic_solver; + #define M(st) stages_highp[SkRasterPipeline::st] = (StageFn)SK_OPTS_NS::st; SK_RASTER_PIPELINE_STAGES(M) just_return_highp = (StageFn)SK_OPTS_NS::just_return;