Simplify grvx::approx_angle_between_vectors signature

Rather than taking the x and y values separately (ax, ay, bx, by), simply take two vec<N*2>'s (a, b), where the x's are in a.lo and the y's in a.hi. Bug: skia:10419 Change-Id: I21d659c79247ccb625351c20b93c550d0afffe79 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/339458 Reviewed-by: Brian Salomon <bsalomon@google.com> Commit-Queue: Chris Dalton <csmartdalton@google.com>
2020-12-01 02:11:24 -07:00 · 2020-12-01 02:11:24 -07:00 · 356cef301b
commit 356cef301b
parent 540c13a791
2 changed files with 29 additions and 28 deletions
--- a/src/gpu/GrVx.h
+++ b/src/gpu/GrVx.h
@ -68,30 +68,30 @@ template<int N> SK_ALWAYS_INLINE vec<N> fast_madd(vec<N> f, vec<N> m, vec<N> a)
 // infinite at -1 and 1). So the input must still be clamped between -1 and 1.
 #define GRVX_FAST_ACOS_MAX_ERROR SkDegreesToRadians(.96f)
 template<int N> SK_ALWAYS_INLINE vec<N> approx_acos(vec<N> x) {
-    static const vec<N> a = -0.939115566365855f;
-    static const vec<N> b =  0.9217841528914573f;
-    static const vec<N> c = -1.2845906244690837f;
-    static const vec<N> d =  0.295624144969963174f;
-    static const vec<N> pi_over_2 = 1.5707963267948966f;
+    constexpr static float a = -0.939115566365855f;
+    constexpr static float b =  0.9217841528914573f;
+    constexpr static float c = -1.2845906244690837f;
+    constexpr static float d =  0.295624144969963174f;
+    constexpr static float pi_over_2 = 1.5707963267948966f;
    vec<N> xx = x*x;
-    vec<N> numer = fast_madd(b,xx,a);
-    vec<N> denom = fast_madd<N>(xx, fast_madd(d,xx,c), 1);
-    return fast_madd(x, numer/denom, pi_over_2);
+    vec<N> numer = fast_madd<N>(b,xx,a);
+    vec<N> denom = fast_madd<N>(xx, fast_madd<N>(d,xx,c), 1);
+    return fast_madd<N>(x, numer/denom, pi_over_2);
 }

-// Approximates the angle between a and b within .96 degrees (GRVX_FAST_ACOS_MAX_ERROR).
+// Approximates the angle between vectors a and b within .96 degrees (GRVX_FAST_ACOS_MAX_ERROR).
+// a (and b) represent "N" (Nx2/2) 2d vectors in SIMD, with the x values found in a.lo, and the
+// y values in a.hi.
 //
-// Due to fp32 overflow, this method is only valid for max(abs(ax), abs(ay)) and
-// max(abs(bx), abs(by)) in the range (2^-31, 2^31) exclusive. Results are undefined if the inputs
-// fall outside this range.
+// Due to fp32 overflow, this method is only valid for magnitudes in the range (2^-31, 2^31)
+// exclusive. Results are undefined if the inputs fall outside this range.
 //
 // NOTE: If necessary, we can extend our valid range to 2^(+/-63) by normalizing a and b separately.
 // i.e.: "cosTheta = dot(a,b) / sqrt(dot(a,a)) / sqrt(dot(b,b))".
-template<int N>
-SK_ALWAYS_INLINE vec<N> approx_angle_between_vectors(vec<N> ax, vec<N> ay, vec<N> bx, vec<N> by) {
-    vec<N> ab_cosTheta = fast_madd(ax, bx, ay*by);
-    vec<N> ab_pow2 = fast_madd(ay, ay, ax*ax) * fast_madd(by, by, bx*bx);
-    vec<N> cosTheta = ab_cosTheta / skvx::sqrt(ab_pow2);
+template<int Nx2>
+SK_ALWAYS_INLINE vec<Nx2/2> approx_angle_between_vectors(vec<Nx2> a, vec<Nx2> b) {
+    auto aa=a*a, bb=b*b, ab=a*b;
+    auto cosTheta = (ab.lo + ab.hi) / skvx::sqrt((aa.lo + aa.hi) * (bb.lo + bb.hi));
    // Clamp cosTheta such that if it is NaN (e.g., if a or b was 0), then we return acos(1) = 0.
    cosTheta = skvx::max(skvx::min(1, cosTheta), -1);
    return approx_acos(cosTheta);
--- a/tests/GrVxTest.cpp
+++ b/tests/GrVxTest.cpp
@ -170,31 +170,32 @@ static bool check_approx_angle_between_vectors(skiatest::Reporter* r, SkVector a
 }

 static bool check_approx_angle_between_vectors(skiatest::Reporter* r, SkVector a, SkVector b) {
-    float approxTheta = grvx::approx_angle_between_vectors<1>(a.fX, a.fY, b.fX, b.fY).val;
+    float approxTheta = grvx::approx_angle_between_vectors(bit_pun<float2>(a),
+                                                           bit_pun<float2>(b)).val;
    return check_approx_angle_between_vectors(r, a, b, approxTheta);
 }

 DEF_TEST(grvx_approx_angle_between_vectors, r) {
    // Test when a and/or b are zero.
-    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<1>(0,0,0,0).val));
-    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<1>(1,1,0,0).val));
-    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<1>(0,0,1,1).val));
+    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>({0,0}, {0,0}).val));
+    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>({1,1}, {0,0}).val));
+    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>({0,0}, {1,1}).val));
    check_approx_angle_between_vectors(r, {0,0}, {0,0});
    check_approx_angle_between_vectors(r, {1,1}, {0,0});
    check_approx_angle_between_vectors(r, {0,0}, {1,1});

    // Test infinities.
-    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<1>(
-            std::numeric_limits<float>::infinity(),1,2,3).val));
+    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>(
+            {std::numeric_limits<float>::infinity(),1}, {2,3}).val));

    // Test NaNs.
-    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<1>(
-            std::numeric_limits<float>::quiet_NaN(),1,2,3).val));
+    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>(
+            {std::numeric_limits<float>::quiet_NaN(),1}, {2,3}).val));

    // Test demorms.
    float epsilon = std::numeric_limits<float>::denorm_min();
-    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<1>(
-            epsilon, epsilon, epsilon, epsilon).val));
+    REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>(
+            {epsilon, epsilon}, {epsilon, epsilon}).val));

    // Test random floats of all types.
    uint4 mantissas = {0,0,0,0};
@ -219,7 +220,7 @@ DEF_TEST(grvx_approx_angle_between_vectors, r) {
        float4 y0 = bit_pun<float4>(signs | y0exp | mantissas[1]);
        float4 x1 = bit_pun<float4>(signs | x1exp | mantissas[2]);
        float4 y1 = bit_pun<float4>(signs | y1exp | mantissas[3]);
-        float4 rads = approx_angle_between_vectors(x0, y0, x1, y1);
+        float4 rads = approx_angle_between_vectors(skvx::join(x0, y0), skvx::join(x1, y1));
        for (int j = 0; j < 4; ++j) {
            if (!check_approx_angle_between_vectors(r, {x0[j], y0[j]}, {x1[j], y1[j]}, rads[j])) {
                return;