Optimize SkChopCubicAt to chop at two points at once
Adds an SkChopCubicAt overload that performs two chops at once in SIMD. Also updates SkChopCubicAt to accept T values of 0 and 1. This has been the source of bugs in the past. Bug: skia:10419 Change-Id: Ic8a482a69192fb1685f3766411cbdceed830f9b7 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/327436 Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Chris Dalton <csmartdalton@google.com>
This commit is contained in:
parent
36af02d2ef
commit
81b270a659
@ -476,6 +476,11 @@ SINTU Vec<N,T> max(const Vec<N,T>& x, U y) { return max(x, Vec<N,T>(y)); }
|
|||||||
SINTU Vec<N,T> min(U x, const Vec<N,T>& y) { return min(Vec<N,T>(x), y); }
|
SINTU Vec<N,T> min(U x, const Vec<N,T>& y) { return min(Vec<N,T>(x), y); }
|
||||||
SINTU Vec<N,T> max(U x, const Vec<N,T>& y) { return max(Vec<N,T>(x), y); }
|
SINTU Vec<N,T> max(U x, const Vec<N,T>& y) { return max(Vec<N,T>(x), y); }
|
||||||
|
|
||||||
|
// pin matches the logic of SkTPin, which is important when NaN is involved. It always returns
|
||||||
|
// values in the range lo..hi, and if x is NaN, it returns lo.
|
||||||
|
SINT Vec<N,T> pin(const Vec<N,T>& x, const Vec<N,T>& lo, const Vec<N,T>& hi) {
|
||||||
|
return max(lo, min(x, hi));
|
||||||
|
}
|
||||||
|
|
||||||
// Shuffle values from a vector pretty arbitrarily:
|
// Shuffle values from a vector pretty arbitrarily:
|
||||||
// skvx::Vec<4,float> rgba = {R,G,B,A};
|
// skvx::Vec<4,float> rgba = {R,G,B,A};
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include "include/core/SkPoint3.h"
|
#include "include/core/SkPoint3.h"
|
||||||
#include "include/private/SkNx.h"
|
#include "include/private/SkNx.h"
|
||||||
#include "include/private/SkTPin.h"
|
#include "include/private/SkTPin.h"
|
||||||
|
#include "include/private/SkVx.h"
|
||||||
#include "src/core/SkGeometry.h"
|
#include "src/core/SkGeometry.h"
|
||||||
#include "src/core/SkPointPriv.h"
|
#include "src/core/SkPointPriv.h"
|
||||||
|
|
||||||
@ -442,92 +443,122 @@ int SkFindCubicExtrema(SkScalar a, SkScalar b, SkScalar c, SkScalar d,
|
|||||||
return SkFindUnitQuadRoots(A, B, C, tValues);
|
return SkFindUnitQuadRoots(A, B, C, tValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[7], SkScalar t) {
|
// This does not return b when t==1, but it otherwise seems to get better precision than
|
||||||
SkASSERT(t > 0 && t < SK_Scalar1);
|
// "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
|
||||||
|
// The responsibility falls on the caller to check that t != 1 before calling.
|
||||||
Sk2s p0 = from_point(src[0]);
|
template<int N, typename T>
|
||||||
Sk2s p1 = from_point(src[1]);
|
inline static skvx::Vec<N,T> unchecked_mix(const skvx::Vec<N,T>& a, const skvx::Vec<N,T>& b,
|
||||||
Sk2s p2 = from_point(src[2]);
|
const skvx::Vec<N,T>& t) {
|
||||||
Sk2s p3 = from_point(src[3]);
|
return (b - a)*t + a;
|
||||||
Sk2s tt(t);
|
|
||||||
|
|
||||||
Sk2s ab = interp(p0, p1, tt);
|
|
||||||
Sk2s bc = interp(p1, p2, tt);
|
|
||||||
Sk2s cd = interp(p2, p3, tt);
|
|
||||||
Sk2s abc = interp(ab, bc, tt);
|
|
||||||
Sk2s bcd = interp(bc, cd, tt);
|
|
||||||
Sk2s abcd = interp(abc, bcd, tt);
|
|
||||||
|
|
||||||
dst[0] = to_point(p0);
|
|
||||||
dst[1] = to_point(ab);
|
|
||||||
dst[2] = to_point(abc);
|
|
||||||
dst[3] = to_point(abcd);
|
|
||||||
dst[4] = to_point(bcd);
|
|
||||||
dst[5] = to_point(cd);
|
|
||||||
dst[6] = to_point(p3);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* http://code.google.com/p/skia/issues/detail?id=32
|
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[7], SkScalar t) {
|
||||||
|
using float2 = skvx::Vec<2,float>;
|
||||||
|
SkASSERT(0 <= t && t <= 1);
|
||||||
|
|
||||||
This test code would fail when we didn't check the return result of
|
if (t == 1) {
|
||||||
valid_unit_divide in SkChopCubicAt(... tValues[], int roots). The reason is
|
memcpy(dst, src, sizeof(SkPoint) * 4);
|
||||||
that after the first chop, the parameters to valid_unit_divide are equal
|
dst[4] = dst[5] = dst[6] = src[3];
|
||||||
(thanks to finite float precision and rounding in the subtracts). Thus
|
return;
|
||||||
even though the 2nd tValue looks < 1.0, after we renormalize it, we end
|
|
||||||
up with 1.0, hence the need to check and just return the last cubic as
|
|
||||||
a degenerate clump of 4 points in the sampe place.
|
|
||||||
|
|
||||||
static void test_cubic() {
|
|
||||||
SkPoint src[4] = {
|
|
||||||
{ 556.25000, 523.03003 },
|
|
||||||
{ 556.23999, 522.96002 },
|
|
||||||
{ 556.21997, 522.89001 },
|
|
||||||
{ 556.21997, 522.82001 }
|
|
||||||
};
|
|
||||||
SkPoint dst[10];
|
|
||||||
SkScalar tval[] = { 0.33333334f, 0.99999994f };
|
|
||||||
SkChopCubicAt(src, dst, tval, 2);
|
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
float2 p0 = skvx::bit_pun<float2>(src[0]);
|
||||||
|
float2 p1 = skvx::bit_pun<float2>(src[1]);
|
||||||
|
float2 p2 = skvx::bit_pun<float2>(src[2]);
|
||||||
|
float2 p3 = skvx::bit_pun<float2>(src[3]);
|
||||||
|
float2 T = t;
|
||||||
|
|
||||||
|
float2 ab = unchecked_mix(p0, p1, T);
|
||||||
|
float2 bc = unchecked_mix(p1, p2, T);
|
||||||
|
float2 cd = unchecked_mix(p2, p3, T);
|
||||||
|
float2 abc = unchecked_mix(ab, bc, T);
|
||||||
|
float2 bcd = unchecked_mix(bc, cd, T);
|
||||||
|
float2 abcd = unchecked_mix(abc, bcd, T);
|
||||||
|
|
||||||
|
dst[0] = skvx::bit_pun<SkPoint>(p0);
|
||||||
|
dst[1] = skvx::bit_pun<SkPoint>(ab);
|
||||||
|
dst[2] = skvx::bit_pun<SkPoint>(abc);
|
||||||
|
dst[3] = skvx::bit_pun<SkPoint>(abcd);
|
||||||
|
dst[4] = skvx::bit_pun<SkPoint>(bcd);
|
||||||
|
dst[5] = skvx::bit_pun<SkPoint>(cd);
|
||||||
|
dst[6] = skvx::bit_pun<SkPoint>(p3);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[10], float t0, float t1) {
|
||||||
|
using float4 = skvx::Vec<4,float>;
|
||||||
|
using float2 = skvx::Vec<2,float>;
|
||||||
|
SkASSERT(0 <= t0 && t0 <= t1 && t1 <= 1);
|
||||||
|
|
||||||
|
if (t1 == 1) {
|
||||||
|
SkChopCubicAt(src, dst, t0);
|
||||||
|
dst[7] = dst[8] = dst[9] = src[3];
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform both chops in parallel using 4-lane SIMD.
|
||||||
|
float4 p00, p11, p22, p33, T;
|
||||||
|
p00.lo = p00.hi = skvx::bit_pun<float2>(src[0]);
|
||||||
|
p11.lo = p11.hi = skvx::bit_pun<float2>(src[1]);
|
||||||
|
p22.lo = p22.hi = skvx::bit_pun<float2>(src[2]);
|
||||||
|
p33.lo = p33.hi = skvx::bit_pun<float2>(src[3]);
|
||||||
|
T.lo = t0;
|
||||||
|
T.hi = t1;
|
||||||
|
|
||||||
|
float4 ab = unchecked_mix(p00, p11, T);
|
||||||
|
float4 bc = unchecked_mix(p11, p22, T);
|
||||||
|
float4 cd = unchecked_mix(p22, p33, T);
|
||||||
|
float4 abc = unchecked_mix(ab, bc, T);
|
||||||
|
float4 bcd = unchecked_mix(bc, cd, T);
|
||||||
|
float4 abcd = unchecked_mix(abc, bcd, T);
|
||||||
|
float4 middle = unchecked_mix(abc, bcd, skvx::shuffle<2,3,0,1>(T));
|
||||||
|
|
||||||
|
dst[0] = skvx::bit_pun<SkPoint>(p00.lo);
|
||||||
|
dst[1] = skvx::bit_pun<SkPoint>(ab.lo);
|
||||||
|
dst[2] = skvx::bit_pun<SkPoint>(abc.lo);
|
||||||
|
dst[3] = skvx::bit_pun<SkPoint>(abcd.lo);
|
||||||
|
middle.store(dst + 4);
|
||||||
|
dst[6] = skvx::bit_pun<SkPoint>(abcd.hi);
|
||||||
|
dst[7] = skvx::bit_pun<SkPoint>(bcd.hi);
|
||||||
|
dst[8] = skvx::bit_pun<SkPoint>(cd.hi);
|
||||||
|
dst[9] = skvx::bit_pun<SkPoint>(p33.hi);
|
||||||
|
}
|
||||||
|
|
||||||
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[],
|
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[],
|
||||||
const SkScalar tValues[], int roots) {
|
const SkScalar tValues[], int tCount) {
|
||||||
|
using float2 = skvx::Vec<2,float>;
|
||||||
|
|
||||||
#ifdef SK_DEBUG
|
#ifdef SK_DEBUG
|
||||||
{
|
float lastT = 0;
|
||||||
for (int i = 0; i < roots - 1; i++)
|
for (int i = 0; i < tCount; i++) {
|
||||||
{
|
SkASSERT(lastT <= tValues[i] && tValues[i] <= 1);
|
||||||
SkASSERT(0 < tValues[i] && tValues[i] < 1);
|
lastT = tValues[i];
|
||||||
SkASSERT(0 < tValues[i+1] && tValues[i+1] < 1);
|
|
||||||
SkASSERT(tValues[i] < tValues[i+1]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (dst) {
|
if (dst) {
|
||||||
if (roots == 0) { // nothing to chop
|
if (tCount == 0) { // nothing to chop
|
||||||
memcpy(dst, src, 4*sizeof(SkPoint));
|
memcpy(dst, src, 4*sizeof(SkPoint));
|
||||||
} else {
|
} else {
|
||||||
SkScalar t = tValues[0];
|
int i = 0;
|
||||||
SkPoint tmp[4];
|
for (; i < tCount - 1; i += 2) {
|
||||||
|
// Do two chops at once.
|
||||||
for (int i = 0; i < roots; i++) {
|
float2 tt = float2::Load(tValues + i);
|
||||||
|
if (i != 0) {
|
||||||
|
float lastT = tValues[i - 1];
|
||||||
|
tt = skvx::pin((tt - lastT) / (1 - lastT), float2(0), float2(1));
|
||||||
|
}
|
||||||
|
SkChopCubicAt(src, dst, tt[0], tt[1]);
|
||||||
|
src = dst = dst + 6;
|
||||||
|
}
|
||||||
|
if (i < tCount) {
|
||||||
|
// Chop the final cubic if there was an odd number of chops.
|
||||||
|
SkASSERT(i + 1 == tCount);
|
||||||
|
float t = tValues[i];
|
||||||
|
if (i != 0) {
|
||||||
|
float lastT = tValues[i - 1];
|
||||||
|
t = SkTPin((t - lastT) / (1 - lastT), 0.f, 1.f);
|
||||||
|
}
|
||||||
SkChopCubicAt(src, dst, t);
|
SkChopCubicAt(src, dst, t);
|
||||||
if (i == roots - 1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
dst += 3;
|
|
||||||
// have src point to the remaining cubic (after the chop)
|
|
||||||
memcpy(tmp, dst, 4 * sizeof(SkPoint));
|
|
||||||
src = tmp;
|
|
||||||
|
|
||||||
// watch out in case the renormalized t isn't in range
|
|
||||||
if (!valid_unit_divide(tValues[i+1] - tValues[i],
|
|
||||||
SK_Scalar1 - tValues[i], &t)) {
|
|
||||||
// if we can't, just create a degenerate cubic
|
|
||||||
dst[4] = dst[5] = dst[6] = src[3];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -133,13 +133,19 @@ void SkEvalCubicAt(const SkPoint src[4], SkScalar t, SkPoint* locOrNull,
|
|||||||
SkVector* tangentOrNull, SkVector* curvatureOrNull);
|
SkVector* tangentOrNull, SkVector* curvatureOrNull);
|
||||||
|
|
||||||
/** Given a src cubic bezier, chop it at the specified t value,
|
/** Given a src cubic bezier, chop it at the specified t value,
|
||||||
where 0 < t < 1, and return the two new cubics in dst:
|
where 0 <= t <= 1, and return the two new cubics in dst:
|
||||||
dst[0..3] and dst[3..6]
|
dst[0..3] and dst[3..6]
|
||||||
*/
|
*/
|
||||||
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[7], SkScalar t);
|
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[7], SkScalar t);
|
||||||
|
|
||||||
|
/** Given a src cubic bezier, chop it at the specified t0 and t1 values,
|
||||||
|
where 0 <= t0 <= t1 <= 1, and return the three new cubics in dst:
|
||||||
|
dst[0..3], dst[3..6], and dst[6..9]
|
||||||
|
*/
|
||||||
|
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[10], float t0, float t1);
|
||||||
|
|
||||||
/** Given a src cubic bezier, chop it at the specified t values,
|
/** Given a src cubic bezier, chop it at the specified t values,
|
||||||
where 0 < t < 1, and return the new cubics in dst:
|
where 0 <= t0 <= t1 <= ... <= 1, and return the new cubics in dst:
|
||||||
dst[0..3],dst[3..6],...,dst[3*t_count..3*(t_count+1)]
|
dst[0..3],dst[3..6],...,dst[3*t_count..3*(t_count+1)]
|
||||||
*/
|
*/
|
||||||
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[], const SkScalar t[],
|
void SkChopCubicAt(const SkPoint src[4], SkPoint dst[], const SkScalar t[],
|
||||||
|
@ -46,6 +46,72 @@ static void testChopCubic(skiatest::Reporter* reporter) {
|
|||||||
REPORTER_ASSERT(reporter, pts[i].fX == pts[i].fY);
|
REPORTER_ASSERT(reporter, pts[i].fX == pts[i].fY);
|
||||||
REPORTER_ASSERT(reporter, pts[i].fX == i * .5f);
|
REPORTER_ASSERT(reporter, pts[i].fX == i * .5f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const float chopTs[] = {
|
||||||
|
0, 3/83.f, 3/79.f, 3/73.f, 3/71.f, 3/67.f, 3/61.f, 3/59.f, 3/53.f, 3/47.f, 3/43.f, 3/41.f,
|
||||||
|
3/37.f, 3/31.f, 3/29.f, 3/23.f, 3/19.f, 3/17.f, 3/13.f, 3/11.f, 3/7.f, 3/5.f, 1,
|
||||||
|
};
|
||||||
|
float ones[] = {1,1,1,1,1};
|
||||||
|
|
||||||
|
// Ensure an odd number of T values so we exercise the single chop code at the end of
|
||||||
|
// SkChopCubicAt form multiple T.
|
||||||
|
static_assert(SK_ARRAY_COUNT(chopTs) % 2 == 1);
|
||||||
|
static_assert(SK_ARRAY_COUNT(ones) % 2 == 1);
|
||||||
|
|
||||||
|
SkRandom rand;
|
||||||
|
for (int iterIdx = 0; iterIdx < 5; ++iterIdx) {
|
||||||
|
SkPoint pts[4] = {{rand.nextF(), rand.nextF()}, {rand.nextF(), rand.nextF()},
|
||||||
|
{rand.nextF(), rand.nextF()}, {rand.nextF(), rand.nextF()}};
|
||||||
|
|
||||||
|
SkPoint allChops[4 + SK_ARRAY_COUNT(chopTs)*3];
|
||||||
|
SkChopCubicAt(pts, allChops, chopTs, SK_ARRAY_COUNT(chopTs));
|
||||||
|
int i = 3;
|
||||||
|
for (float chopT : chopTs) {
|
||||||
|
// Ensure we chop at approximately the correct points when we chop an entire list.
|
||||||
|
SkPoint expectedPt;
|
||||||
|
SkEvalCubicAt(pts, chopT, &expectedPt, nullptr, nullptr);
|
||||||
|
REPORTER_ASSERT(reporter, SkScalarNearlyEqual(allChops[i].x(), expectedPt.x()));
|
||||||
|
REPORTER_ASSERT(reporter, SkScalarNearlyEqual(allChops[i].y(), expectedPt.y()));
|
||||||
|
if (chopT == 0) {
|
||||||
|
REPORTER_ASSERT(reporter, allChops[i] == pts[0]);
|
||||||
|
}
|
||||||
|
if (chopT == 1) {
|
||||||
|
REPORTER_ASSERT(reporter, allChops[i] == pts[3]);
|
||||||
|
}
|
||||||
|
i += 3;
|
||||||
|
|
||||||
|
// Ensure the middle is exactly degenerate when we chop at two equal points.
|
||||||
|
SkPoint localChops[10];
|
||||||
|
SkChopCubicAt(pts, localChops, chopT, chopT);
|
||||||
|
REPORTER_ASSERT(reporter, localChops[3] == localChops[4]);
|
||||||
|
REPORTER_ASSERT(reporter, localChops[3] == localChops[5]);
|
||||||
|
REPORTER_ASSERT(reporter, localChops[3] == localChops[6]);
|
||||||
|
if (chopT == 0) {
|
||||||
|
// Also ensure the first curve is exactly p0 when we chop at T=0.
|
||||||
|
REPORTER_ASSERT(reporter, localChops[0] == pts[0]);
|
||||||
|
REPORTER_ASSERT(reporter, localChops[1] == pts[0]);
|
||||||
|
REPORTER_ASSERT(reporter, localChops[2] == pts[0]);
|
||||||
|
REPORTER_ASSERT(reporter, localChops[3] == pts[0]);
|
||||||
|
}
|
||||||
|
if (chopT == 1) {
|
||||||
|
// Also ensure the last curve is exactly p3 when we chop at T=1.
|
||||||
|
REPORTER_ASSERT(reporter, localChops[6] == pts[3]);
|
||||||
|
REPORTER_ASSERT(reporter, localChops[7] == pts[3]);
|
||||||
|
REPORTER_ASSERT(reporter, localChops[8] == pts[3]);
|
||||||
|
REPORTER_ASSERT(reporter, localChops[9] == pts[3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now test what happens when SkChopCubicAt does 0/0 and gets NaN values.
|
||||||
|
SkPoint oneChops[4 + SK_ARRAY_COUNT(ones)*3];
|
||||||
|
SkChopCubicAt(pts, oneChops, ones, SK_ARRAY_COUNT(ones));
|
||||||
|
REPORTER_ASSERT(reporter, oneChops[0] == pts[0]);
|
||||||
|
REPORTER_ASSERT(reporter, oneChops[1] == pts[1]);
|
||||||
|
REPORTER_ASSERT(reporter, oneChops[2] == pts[2]);
|
||||||
|
for (size_t i = 3; i < SK_ARRAY_COUNT(oneChops); ++i) {
|
||||||
|
REPORTER_ASSERT(reporter, oneChops[i] == pts[3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_pairs(skiatest::Reporter* reporter, int index, SkScalar t, const char name[],
|
static void check_pairs(skiatest::Reporter* reporter, int index, SkScalar t, const char name[],
|
||||||
|
Loading…
Reference in New Issue
Block a user