Move test around in cubic_solver to test function, not delta_t.
Add SkOpts variant for avx2 to get FMA Decrease tolerance now that we're testing the function Before 15/15 MB 1 1.13ms 1.17ms 1.18ms 1.26ms 4% cubicmap_0_1_1_1 15/15 MB 1 1.08ms 1.13ms 1.12ms 1.17ms 3% cubicmap_0_1_1_0 15/15 MB 1 862µs 904µs 900µs 937µs 3% cubicmap_0_1_0_1 15/15 MB 1 861µs 878µs 882µs 934µs 3% cubicmap_0_1_0_0 15/15 MB 1 1.44ms 1.47ms 1.49ms 1.55ms 3% cubicmap_1_0_1_1 15/15 MB 1 1.44ms 1.48ms 1.48ms 1.55ms 3% cubicmap_1_0_1_0 15/15 MB 1 1.42ms 1.42ms 1.46ms 1.53ms 3% cubicmap_1_0_0_1 15/15 MB 1 1.42ms 1.42ms 1.44ms 1.51ms 2% cubicmap_1_0_0_0 After moving the check to the function, not delta_t 15/15 MB 1 900µs 900µs 915µs 971µs 3% cubicmap_0_1_1_1 15/15 MB 1 899µs 900µs 914µs 988µs 3% cubicmap_0_1_1_0 15/15 MB 1 865µs 896µs 890µs 946µs 3% cubicmap_0_1_0_1 15/15 MB 1 866µs 910µs 914µs 959µs 3% cubicmap_0_1_0_0 15/15 MB 1 1.29ms 1.29ms 1.33ms 1.44ms 4% cubicmap_1_0_1_1 15/15 MB 1 1.28ms 1.29ms 1.34ms 1.54ms 6% cubicmap_1_0_1_0 15/15 MB 1 1.26ms 1.26ms 1.27ms 1.34ms 3% cubicmap_1_0_0_1 15/15 MB 1 1.26ms 1.26ms 1.27ms 1.3ms 2% cubicmap_1_0_0_0 After SkOpts (on an avx2 machine) 15/15 MB 1 613µs 613µs 616µs 646µs 2% cubicmap_0_1_1_1 15/15 MB 1 613µs 613µs 624µs 654µs 3% cubicmap_0_1_1_0 15/15 MB 1 862µs 865µs 867µs 887µs 1% cubicmap_0_1_0_1 15/15 MB 1 865µs 901µs 896µs 949µs 3% cubicmap_0_1_0_0 15/15 MB 1 849µs 850µs 868µs 929µs 4% cubicmap_1_0_1_1 15/15 MB 1 849µs 850µs 873µs 940µs 4% cubicmap_1_0_1_0 15/15 MB 1 831µs 831µs 856µs 950µs 5% cubicmap_1_0_0_1 15/15 MB 1 831µs 831µs 848µs 911µs 3% cubicmap_1_0_0_0 (not checked in) if we also enable the pragma in cubic_solver 15/15 MB 1 593µs 594µs 597µs 623µs 2% cubicmap_0_1_1_1 15/15 MB 1 593µs 595µs 605µs 629µs 2% cubicmap_0_1_1_0 15/15 MB 1 864µs 867µs 869µs 890µs 1% cubicmap_0_1_0_1 15/15 MB 1 864µs 866µs 886µs 950µs 4% cubicmap_0_1_0_0 15/15 MB 1 809µs 831µs 841µs 891µs 4% cubicmap_1_0_1_1 15/15 MB 1 809µs 810µs 855µs 1.11ms 11% cubicmap_1_0_1_0 15/15 MB 1 794µs 861µs 856µs 914µs 4% cubicmap_1_0_0_1 15/15 MB 1 794µs 821µs 818µs 853µs 3% cubicmap_1_0_0_0 Change-Id: I260391be956d31a5cf3d0367d1285e56af7568f8 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/226499 Reviewed-by: Mike Reed <reed@google.com> Reviewed-by: Mike Klein <mtklein@google.com> Commit-Queue: Mike Reed <reed@google.com> Auto-Submit: Mike Reed <reed@google.com>
This commit is contained in:
parent
cc32642882
commit
3a8ff230ab
@ -13,6 +13,7 @@
|
||||
#include "include/private/SkSafe_math.h"
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
|
||||
@ -248,4 +249,12 @@ static inline float sk_ieee_double_divide_TODO_IS_DIVIDE_BY_ZERO_SAFE_HERE(doubl
|
||||
return sk_ieee_double_divide(n,d);
|
||||
}
|
||||
|
||||
static inline float sk_fmaf(float f, float m, float a) {
|
||||
#if defined(FP_FAST_FMA)
|
||||
return std::fmaf(f,m,a);
|
||||
#else
|
||||
return f*m+a;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "include/core/SkCubicMap.h"
|
||||
#include "include/private/SkNx.h"
|
||||
#include "src/core/SkOpts.h"
|
||||
|
||||
//#define CUBICMAP_TRACK_MAX_ERROR
|
||||
|
||||
@ -14,126 +15,15 @@
|
||||
#include "src/pathops/SkPathOpsCubic.h"
|
||||
#endif
|
||||
|
||||
static float eval_poly3(float a, float b, float c, float d, float t) {
|
||||
return ((a * t + b) * t + c) * t + d;
|
||||
}
|
||||
|
||||
static float eval_poly2(float a, float b, float c, float t) {
|
||||
return (a * t + b) * t + c;
|
||||
}
|
||||
|
||||
static float eval_poly1(float a, float b, float t) {
|
||||
return a * t + b;
|
||||
}
|
||||
|
||||
static float guess_nice_cubic_root(float A, float B, float C, float D) {
|
||||
return -D;
|
||||
}
|
||||
|
||||
#ifdef SK_DEBUG
|
||||
static bool valid(float r) {
|
||||
return r >= 0 && r <= 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool nearly_zero(SkScalar x) {
|
||||
SkASSERT(x >= 0);
|
||||
return x <= 0.0000000001f;
|
||||
}
|
||||
|
||||
static inline bool delta_nearly_zero(float delta) {
|
||||
return sk_float_abs(delta) <= 0.0001f;
|
||||
}
|
||||
|
||||
#ifdef CUBICMAP_TRACK_MAX_ERROR
|
||||
static int max_iters;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* TODO: will this be faster if we algebraically compute the polynomials for the numer and denom
|
||||
* rather than compute them in parts?
|
||||
*/
|
||||
static float solve_nice_cubic_halley(float A, float B, float C, float D) {
|
||||
const int MAX_ITERS = 8;
|
||||
const float A3 = 3 * A;
|
||||
const float B2 = B + B;
|
||||
|
||||
float t = guess_nice_cubic_root(A, B, C, D);
|
||||
int iters = 0;
|
||||
for (; iters < MAX_ITERS; ++iters) {
|
||||
float f = eval_poly3(A, B, C, D, t); // f = At^3 + Bt^2 + Ct + D
|
||||
float fp = eval_poly2(A3, B2, C, t); // f' = 3At^2 + 2Bt + C
|
||||
float fpp = eval_poly1(A3 + A3, B2, t); // f'' = 6At + 2B
|
||||
|
||||
float numer = 2 * fp * f;
|
||||
if (numer == 0) {
|
||||
break;
|
||||
}
|
||||
float denom = 2 * fp * fp - f * fpp;
|
||||
float delta = numer / denom;
|
||||
// SkDebugf("[%d] delta %g t %g\n", iters, delta, t);
|
||||
if (delta_nearly_zero(delta)) {
|
||||
break;
|
||||
}
|
||||
float new_t = t - delta;
|
||||
SkASSERT(valid(new_t));
|
||||
t = new_t;
|
||||
}
|
||||
SkASSERT(valid(t));
|
||||
#ifdef CUBICMAP_TRACK_MAX_ERROR
|
||||
if (iters > max_iters) {
|
||||
max_iters = iters;
|
||||
SkDebugf("max_iters %d\n", max_iters);
|
||||
}
|
||||
#endif
|
||||
return t;
|
||||
}
|
||||
|
||||
// At the moment, this technique does not appear to be better (i.e. faster at same precision)
|
||||
// but the code is left here (at least for a while) to document the attempt.
|
||||
static float solve_nice_cubic_householder(float A, float B, float C, float D) {
|
||||
const int MAX_ITERS = 8;
|
||||
const float A3 = 3 * A;
|
||||
const float B2 = B + B;
|
||||
|
||||
float t = guess_nice_cubic_root(A, B, C, D);
|
||||
int iters = 0;
|
||||
for (; iters < MAX_ITERS; ++iters) {
|
||||
float f = eval_poly3(A, B, C, D, t); // f = At^3 + Bt^2 + Ct + D
|
||||
float fp = eval_poly2(A3, B2, C, t); // f' = 3At^2 + 2Bt + C
|
||||
float fpp = eval_poly1(A3 + A3, B2, t); // f'' = 6At + 2B
|
||||
float fppp = A3 + A3; // f''' = 6A
|
||||
|
||||
float f2 = f * f;
|
||||
float fp2 = fp * fp;
|
||||
|
||||
// float numer = 6 * f * fp * fp - 3 * f * f * fpp;
|
||||
// float denom = 6 * fp * fp * fp - 6 * f * fp * fpp + f * f * fppp;
|
||||
|
||||
float numer = 6 * f * fp2 - 3 * f2 * fpp;
|
||||
if (numer == 0) {
|
||||
break;
|
||||
}
|
||||
float denom = 6 * (fp2 * fp - f * fp * fpp) + f2 * fppp;
|
||||
float delta = numer / denom;
|
||||
// SkDebugf("[%d] delta %g t %g\n", iters, delta, t);
|
||||
if (delta_nearly_zero(delta)) {
|
||||
break;
|
||||
}
|
||||
float new_t = t - delta;
|
||||
SkASSERT(valid(new_t));
|
||||
t = new_t;
|
||||
}
|
||||
SkASSERT(valid(t));
|
||||
#ifdef CUBICMAP_TRACK_MAX_ERROR
|
||||
if (iters > max_iters) {
|
||||
max_iters = iters;
|
||||
SkDebugf("max_iters %d\n", max_iters);
|
||||
}
|
||||
#endif
|
||||
return t;
|
||||
}
|
||||
|
||||
#ifdef CUBICMAP_TRACK_MAX_ERROR
|
||||
static float compute_slow(float A, float B, float C, float x) {
|
||||
double roots[3];
|
||||
@ -149,9 +39,8 @@ static float compute_t_from_x(float A, float B, float C, float x) {
|
||||
#ifdef CUBICMAP_TRACK_MAX_ERROR
|
||||
float answer = compute_slow(A, B, C, x);
|
||||
#endif
|
||||
float answer2 = true ?
|
||||
solve_nice_cubic_halley(A, B, C, -x) :
|
||||
solve_nice_cubic_householder(A, B, C, -x);
|
||||
float answer2 = SkOpts::cubic_solver(A, B, C, -x);
|
||||
|
||||
#ifdef CUBICMAP_TRACK_MAX_ERROR
|
||||
float err = sk_float_abs(answer - answer2);
|
||||
if (err > max_err) {
|
||||
|
71
src/core/SkCubicSolver.h
Normal file
71
src/core/SkCubicSolver.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright 2018 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SkCubicSolver_DEFINED
|
||||
#define SkCubicSolver_DEFINED
|
||||
|
||||
#include "include/core/SkTypes.h"
|
||||
#include "include/private/SkFloatingPoint.h"
|
||||
|
||||
//#define CUBICMAP_TRACK_MAX_ERROR
|
||||
|
||||
namespace SK_OPTS_NS {
|
||||
|
||||
static float eval_poly(float t, float b) {
|
||||
return b;
|
||||
}
|
||||
|
||||
template <typename... Rest>
|
||||
static float eval_poly(float t, float m, float b, Rest... rest) {
|
||||
return eval_poly(t, sk_fmaf(m,t,b), rest...);
|
||||
}
|
||||
|
||||
inline float cubic_solver(float A, float B, float C, float D) {
|
||||
#ifdef CUBICMAP_TRACK_MAX_ERROR
|
||||
static int max_iters = 0;
|
||||
#endif
|
||||
|
||||
#ifdef SK_DEBUG
|
||||
auto valid = [](float t) {
|
||||
return t >= 0 && t <= 1;
|
||||
};
|
||||
#endif
|
||||
|
||||
auto guess_nice_cubic_root = [](float a, float b, float c, float d) {
|
||||
return -d;
|
||||
};
|
||||
float t = guess_nice_cubic_root(A, B, C, D);
|
||||
|
||||
int iters = 0;
|
||||
const int MAX_ITERS = 8;
|
||||
for (; iters < MAX_ITERS; ++iters) {
|
||||
SkASSERT(valid(t));
|
||||
float f = eval_poly(t, A,B,C,D); // f = At^3 + Bt^2 + Ct + D
|
||||
if (sk_float_abs(f) <= 0.00005f) {
|
||||
break;
|
||||
}
|
||||
float fp = eval_poly(t, 3*A, 2*B, C); // f' = 3At^2 + 2Bt + C
|
||||
float fpp = eval_poly(t, 3*A+3*A, 2*B); // f'' = 6At + 2B
|
||||
|
||||
float numer = 2 * fp * f;
|
||||
float denom = sk_fmaf(2*fp, fp, -(f*fpp));
|
||||
|
||||
t -= numer / denom;
|
||||
}
|
||||
|
||||
#ifdef CUBICMAP_TRACK_MAX_ERROR
|
||||
if (max_iters < iters) {
|
||||
max_iters = iters;
|
||||
SkDebugf("max_iters %d\n", max_iters);
|
||||
}
|
||||
#endif
|
||||
SkASSERT(valid(t));
|
||||
return t;
|
||||
}
|
||||
|
||||
} // namespace SK_OPTS_NS
|
||||
#endif
|
@ -45,6 +45,8 @@
|
||||
#include "src/opts/SkUtils_opts.h"
|
||||
#include "src/opts/SkXfermode_opts.h"
|
||||
|
||||
#include "src/core/SkCubicSolver.h"
|
||||
|
||||
namespace SkOpts {
|
||||
// Define default function pointer values here...
|
||||
// If our global compile options are set high enough, these defaults might even be
|
||||
@ -77,6 +79,8 @@ namespace SkOpts {
|
||||
DEFINE_DEFAULT(rect_memset32);
|
||||
DEFINE_DEFAULT(rect_memset64);
|
||||
|
||||
DEFINE_DEFAULT(cubic_solver);
|
||||
|
||||
DEFINE_DEFAULT(hash_fn);
|
||||
|
||||
DEFINE_DEFAULT(S32_alpha_D32_filter_DX);
|
||||
|
@ -52,6 +52,8 @@ namespace SkOpts {
|
||||
extern void (*rect_memset32)(uint32_t[], uint32_t, int, size_t, int);
|
||||
extern void (*rect_memset64)(uint64_t[], uint64_t, int, size_t, int);
|
||||
|
||||
extern float (*cubic_solver)(float, float, float, float);
|
||||
|
||||
// The fastest high quality 32-bit hash we can provide on this platform.
|
||||
extern uint32_t (*hash_fn)(const void*, size_t, uint32_t seed);
|
||||
static inline uint32_t hash(const void* data, size_t bytes, uint32_t seed=0) {
|
||||
|
@ -11,12 +11,15 @@
|
||||
#include "src/opts/SkBlitRow_opts.h"
|
||||
#include "src/opts/SkRasterPipeline_opts.h"
|
||||
#include "src/opts/SkUtils_opts.h"
|
||||
#include "src/core/SkCubicSolver.h"
|
||||
|
||||
namespace SkOpts {
|
||||
void Init_hsw() {
|
||||
blit_row_color32 = hsw::blit_row_color32;
|
||||
blit_row_s32a_opaque = hsw::blit_row_s32a_opaque;
|
||||
|
||||
cubic_solver = SK_OPTS_NS::cubic_solver;
|
||||
|
||||
#define M(st) stages_highp[SkRasterPipeline::st] = (StageFn)SK_OPTS_NS::st;
|
||||
SK_RASTER_PIPELINE_STAGES(M)
|
||||
just_return_highp = (StageFn)SK_OPTS_NS::just_return;
|
||||
|
Loading…
Reference in New Issue
Block a user