skia2/tests/SkNxTest.cpp

/*
 * Copyright 2015 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "SkNx.h"
#include "Test.h"

template <int N, typename T>
static void test_Nf(skiatest::Reporter* r) {

    auto assert_nearly_eq = [&](double eps, const SkNf<N,T>& v, T a, T b, T c, T d) {
        auto close = [=](T a, T b) { return fabs(a-b) <= eps; };
        T vals[4];
        v.store(vals);
        bool ok = close(vals[0], a) && close(vals[1], b)
               && close(v.template kth<0>(), a) && close(v.template kth<1>(), b);
        REPORTER_ASSERT(r, ok);
        if (N == 4) {
            ok = close(vals[2], c) && close(vals[3], d)
              && close(v.template kth<2>(), c) && close(v.template kth<3>(), d);
            REPORTER_ASSERT(r, ok);
        }
    };
    auto assert_eq = [&](const SkNf<N,T>& v, T a, T b, T c, T d) {
        return assert_nearly_eq(0, v, a,b,c,d);
    };

    T vals[] = {3, 4, 5, 6};
    SkNf<N,T> a = SkNf<N,T>::Load(vals),
              b(a),
              c = a;
    SkNf<N,T> d;
    d = a;

    assert_eq(a, 3, 4, 5, 6);
    assert_eq(b, 3, 4, 5, 6);
    assert_eq(c, 3, 4, 5, 6);
    assert_eq(d, 3, 4, 5, 6);

    assert_eq(a+b, 6, 8, 10, 12);
    assert_eq(a*b, 9, 16, 25, 36);
    assert_eq(a*b-b, 6, 12, 20, 30);
    assert_eq((a*b).sqrt(), 3, 4, 5, 6);
    assert_eq(a/b, 1, 1, 1, 1);
    assert_eq(-a, -3, -4, -5, -6);

    SkNf<N,T> fours(4);

    assert_eq(fours.sqrt(), 2,2,2,2);
    assert_nearly_eq(0.001, fours.rsqrt(), 0.5, 0.5, 0.5, 0.5);

    assert_eq(              fours.      invert(), 0.25, 0.25, 0.25, 0.25);
    assert_nearly_eq(0.001, fours.approxInvert(), 0.25, 0.25, 0.25, 0.25);

    assert_eq(SkNf<N,T>::Min(a, fours), 3, 4, 4, 4);
    assert_eq(SkNf<N,T>::Max(a, fours), 4, 4, 5, 6);

    // Test some comparisons.  This is not exhaustive.
    REPORTER_ASSERT(r, (a == b).allTrue());
    REPORTER_ASSERT(r, (a+b == a*b-b).anyTrue());
    REPORTER_ASSERT(r, !(a+b == a*b-b).allTrue());
    REPORTER_ASSERT(r, !(a+b == a*b).anyTrue());
    REPORTER_ASSERT(r, !(a != b).anyTrue());
    REPORTER_ASSERT(r, (a < fours).anyTrue());
    REPORTER_ASSERT(r, (a <= fours).anyTrue());
    REPORTER_ASSERT(r, !(a > fours).allTrue());
    REPORTER_ASSERT(r, !(a >= fours).allTrue());
}

DEF_TEST(SkNf, r) {
    test_Nf<2, float>(r);
    test_Nf<2, double>(r);

    test_Nf<4, float>(r);
    test_Nf<4, double>(r);
}

template <int N, typename T>
void test_Ni(skiatest::Reporter* r) {
    auto assert_eq = [&](const SkNi<N,T>& v, T a, T b, T c, T d, T e, T f, T g, T h) {
        T vals[8];
        v.store(vals);

        switch (N) {
          case 8: REPORTER_ASSERT(r, vals[4] == e && vals[5] == f && vals[6] == g && vals[7] == h);
          case 4: REPORTER_ASSERT(r, vals[2] == c && vals[3] == d);
          case 2: REPORTER_ASSERT(r, vals[0] == a && vals[1] == b);
        }
        switch (N) {
          case 8: REPORTER_ASSERT(r, v.template kth<4>() == e && v.template kth<5>() == f &&
                                     v.template kth<6>() == g && v.template kth<7>() == h);
          case 4: REPORTER_ASSERT(r, v.template kth<2>() == c && v.template kth<3>() == d);
          case 2: REPORTER_ASSERT(r, v.template kth<0>() == a && v.template kth<1>() == b);
        }
    };

    T vals[] = { 1,2,3,4,5,6,7,8 };
    SkNi<N,T> a = SkNi<N,T>::Load(vals),
              b(a),
              c = a;
    SkNi<N,T> d;
    d = a;

    assert_eq(a, 1,2,3,4,5,6,7,8);
    assert_eq(b, 1,2,3,4,5,6,7,8);
    assert_eq(c, 1,2,3,4,5,6,7,8);
    assert_eq(d, 1,2,3,4,5,6,7,8);

    assert_eq(a+a, 2,4,6,8,10,12,14,16);
    assert_eq(a*a, 1,4,9,16,25,36,49,64);
    assert_eq(a*a-a, 0,2,6,12,20,30,42,56);

    assert_eq(a >> 2, 0,0,0,1,1,1,1,2);
    assert_eq(a << 1, 2,4,6,8,10,12,14,16);

    REPORTER_ASSERT(r, a.template kth<1>() == 2);
}

DEF_TEST(SkNi, r) {
    test_Ni<2, uint16_t>(r);
    test_Ni<4, uint16_t>(r);
    test_Ni<8, uint16_t>(r);

    test_Ni<2, int>(r);
    test_Ni<4, int>(r);
    test_Ni<8, int>(r);
}
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T> The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc. This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h. This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h. To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful. You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel: - Sk4f, Sk4s, Sk2d: feel awesome - Sk2f, Sk2s, Sk4d: feel pretty good No public API changes. TBR=reed@google.com BUG=skia:3592 Review URL: https://codereview.chromium.org/1048593002 2015-03-30 17:50:27 +00:00			`/*`
			`* Copyright 2015 Google Inc.`
			`*`
			`* Use of this source code is governed by a BSD-style license that can be`
			`* found in the LICENSE file.`
			`*/`

			`#include "SkNx.h"`
			`#include "Test.h"`

			`template <int N, typename T>`
			`static void test_Nf(skiatest::Reporter* r) {`

			`auto assert_nearly_eq = [&](double eps, const SkNf<N,T>& v, T a, T b, T c, T d) {`
			`auto close = [=](T a, T b) { return fabs(a-b) <= eps; };`
			`T vals[4];`
			`v.store(vals);`
Use switch operator[](int) to kth<int>() so we can use vget_lane. #floats BUG=skia: BUG=skia:3592 Review URL: https://codereview.chromium.org/1059743002 2015-04-03 13:16:13 +00:00			`bool ok = close(vals[0], a) && close(vals[1], b)`
			`&& close(v.template kth<0>(), a) && close(v.template kth<1>(), b);`
			`REPORTER_ASSERT(r, ok);`
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T> The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc. This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h. This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h. To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful. You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel: - Sk4f, Sk4s, Sk2d: feel awesome - Sk2f, Sk2s, Sk4d: feel pretty good No public API changes. TBR=reed@google.com BUG=skia:3592 Review URL: https://codereview.chromium.org/1048593002 2015-03-30 17:50:27 +00:00			`if (N == 4) {`
Use switch operator[](int) to kth<int>() so we can use vget_lane. #floats BUG=skia: BUG=skia:3592 Review URL: https://codereview.chromium.org/1059743002 2015-04-03 13:16:13 +00:00			`ok = close(vals[2], c) && close(vals[3], d)`
			`&& close(v.template kth<2>(), c) && close(v.template kth<3>(), d);`
			`REPORTER_ASSERT(r, ok);`
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T> The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc. This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h. This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h. To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful. You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel: - Sk4f, Sk4s, Sk2d: feel awesome - Sk2f, Sk2s, Sk4d: feel pretty good No public API changes. TBR=reed@google.com BUG=skia:3592 Review URL: https://codereview.chromium.org/1048593002 2015-03-30 17:50:27 +00:00			`}`
			`};`
			`auto assert_eq = [&](const SkNf<N,T>& v, T a, T b, T c, T d) {`
			`return assert_nearly_eq(0, v, a,b,c,d);`
			`};`

			`T vals[] = {3, 4, 5, 6};`
			`SkNf<N,T> a = SkNf<N,T>::Load(vals),`
			`b(a),`
			`c = a;`
			`SkNf<N,T> d;`
			`d = a;`

			`assert_eq(a, 3, 4, 5, 6);`
			`assert_eq(b, 3, 4, 5, 6);`
			`assert_eq(c, 3, 4, 5, 6);`
			`assert_eq(d, 3, 4, 5, 6);`

			`assert_eq(a+b, 6, 8, 10, 12);`
			`assert_eq(a*b, 9, 16, 25, 36);`
			`assert_eq(a*b-b, 6, 12, 20, 30);`
			`assert_eq((a*b).sqrt(), 3, 4, 5, 6);`
			`assert_eq(a/b, 1, 1, 1, 1);`
			`assert_eq(-a, -3, -4, -5, -6);`

			`SkNf<N,T> fours(4);`

			`assert_eq(fours.sqrt(), 2,2,2,2);`
Revert of Split rsqrt into rsqrt{0,1,2}, with increasing cost and precision on ARM (patchset #2 id:20001 of https://codereview.chromium.org/1109913002/) Reason for revert: arm64 typos Original issue's description: > Split rsqrt into rsqrt{0,1,2}, with increasing cost and precision on ARM > > This is a logical no-op. Everything was using the equivalent of rsqrt1() before, and is now after. > > BUG=skia: > > Committed: https://skia.googlesource.com/skia/+/9de16283fdc8cc0d31a84f503578d0ecea4e8297 TBR=reed@google.com,mtklein@chromium.org NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=skia: Review URL: https://codereview.chromium.org/1105233003 2015-04-27 20:55:53 +00:00			`assert_nearly_eq(0.001, fours.rsqrt(), 0.5, 0.5, 0.5, 0.5);`
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T> The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc. This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h. This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h. To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful. You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel: - Sk4f, Sk4s, Sk2d: feel awesome - Sk2f, Sk2s, Sk4d: feel pretty good No public API changes. TBR=reed@google.com BUG=skia:3592 Review URL: https://codereview.chromium.org/1048593002 2015-03-30 17:50:27 +00:00
			`assert_eq( fours. invert(), 0.25, 0.25, 0.25, 0.25);`
			`assert_nearly_eq(0.001, fours.approxInvert(), 0.25, 0.25, 0.25, 0.25);`

			`assert_eq(SkNf<N,T>::Min(a, fours), 3, 4, 4, 4);`
			`assert_eq(SkNf<N,T>::Max(a, fours), 4, 4, 5, 6);`

			`// Test some comparisons. This is not exhaustive.`
			`REPORTER_ASSERT(r, (a == b).allTrue());`
			`REPORTER_ASSERT(r, (a+b == a*b-b).anyTrue());`
			`REPORTER_ASSERT(r, !(a+b == a*b-b).allTrue());`
			`REPORTER_ASSERT(r, !(a+b == a*b).anyTrue());`
			`REPORTER_ASSERT(r, !(a != b).anyTrue());`
			`REPORTER_ASSERT(r, (a < fours).anyTrue());`
			`REPORTER_ASSERT(r, (a <= fours).anyTrue());`
			`REPORTER_ASSERT(r, !(a > fours).allTrue());`
			`REPORTER_ASSERT(r, !(a >= fours).allTrue());`
			`}`

			`DEF_TEST(SkNf, r) {`
			`test_Nf<2, float>(r);`
			`test_Nf<2, double>(r);`

			`test_Nf<4, float>(r);`
			`test_Nf<4, double>(r);`
			`}`
Sk4h and Sk8h for SSE These will underly the SkPMFloat-like class for uint16_t components. Sk4h will back a single-pixel version, and Sk8h any larger number than that. BUG=skia: Review URL: https://codereview.chromium.org/1088883005 2015-04-14 21:02:52 +00:00
			`template <int N, typename T>`
			`void test_Ni(skiatest::Reporter* r) {`
			`auto assert_eq = [&](const SkNi<N,T>& v, T a, T b, T c, T d, T e, T f, T g, T h) {`
			`T vals[8];`
			`v.store(vals);`

			`switch (N) {`
			`case 8: REPORTER_ASSERT(r, vals[4] == e && vals[5] == f && vals[6] == g && vals[7] == h);`
			`case 4: REPORTER_ASSERT(r, vals[2] == c && vals[3] == d);`
			`case 2: REPORTER_ASSERT(r, vals[0] == a && vals[1] == b);`
			`}`
Mike's radial gradient CL with better float -> int. patch from issue 1072303005 at patchset 40001 (http://crrev.com/1072303005#ps40001) This looks quite launchable. radial_gradient3, min of 100 samples: N5: 985µs -> 946µs MBP: 395µs -> 279µs On my MBP, most of the meat looks like it's now in reading the cache and writing to dst one color at a time. Is that something we could do in float math rather than with a lookup table? BUG=skia: CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Mac10.8-Clang-Arm7-Debug-Android-Trybot,Build-Ubuntu-GCC-Arm7-Release-Android_NoNeon-Trybot Committed: https://skia.googlesource.com/skia/+/abf6c5cf95e921fae59efb487480e5b5081cf0ec Review URL: https://codereview.chromium.org/1109643002 2015-04-27 19:08:01 +00:00			`switch (N) {`
			`case 8: REPORTER_ASSERT(r, v.template kth<4>() == e && v.template kth<5>() == f &&`
			`v.template kth<6>() == g && v.template kth<7>() == h);`
			`case 4: REPORTER_ASSERT(r, v.template kth<2>() == c && v.template kth<3>() == d);`
			`case 2: REPORTER_ASSERT(r, v.template kth<0>() == a && v.template kth<1>() == b);`
			`}`
Sk4h and Sk8h for SSE These will underly the SkPMFloat-like class for uint16_t components. Sk4h will back a single-pixel version, and Sk8h any larger number than that. BUG=skia: Review URL: https://codereview.chromium.org/1088883005 2015-04-14 21:02:52 +00:00			`};`

			`T vals[] = { 1,2,3,4,5,6,7,8 };`
			`SkNi<N,T> a = SkNi<N,T>::Load(vals),`
			`b(a),`
			`c = a;`
			`SkNi<N,T> d;`
			`d = a;`

			`assert_eq(a, 1,2,3,4,5,6,7,8);`
			`assert_eq(b, 1,2,3,4,5,6,7,8);`
			`assert_eq(c, 1,2,3,4,5,6,7,8);`
			`assert_eq(d, 1,2,3,4,5,6,7,8);`

			`assert_eq(a+a, 2,4,6,8,10,12,14,16);`
			`assert_eq(a*a, 1,4,9,16,25,36,49,64);`
			`assert_eq(a*a-a, 0,2,6,12,20,30,42,56);`

			`assert_eq(a >> 2, 0,0,0,1,1,1,1,2);`
			`assert_eq(a << 1, 2,4,6,8,10,12,14,16);`

			`REPORTER_ASSERT(r, a.template kth<1>() == 2);`
			`}`

			`DEF_TEST(SkNi, r) {`
			`test_Ni<2, uint16_t>(r);`
			`test_Ni<4, uint16_t>(r);`
			`test_Ni<8, uint16_t>(r);`
Mike's radial gradient CL with better float -> int. patch from issue 1072303005 at patchset 40001 (http://crrev.com/1072303005#ps40001) This looks quite launchable. radial_gradient3, min of 100 samples: N5: 985µs -> 946µs MBP: 395µs -> 279µs On my MBP, most of the meat looks like it's now in reading the cache and writing to dst one color at a time. Is that something we could do in float math rather than with a lookup table? BUG=skia: CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Mac10.8-Clang-Arm7-Debug-Android-Trybot,Build-Ubuntu-GCC-Arm7-Release-Android_NoNeon-Trybot Committed: https://skia.googlesource.com/skia/+/abf6c5cf95e921fae59efb487480e5b5081cf0ec Review URL: https://codereview.chromium.org/1109643002 2015-04-27 19:08:01 +00:00
			`test_Ni<2, int>(r);`
			`test_Ni<4, int>(r);`
			`test_Ni<8, int>(r);`
Sk4h and Sk8h for SSE These will underly the SkPMFloat-like class for uint16_t components. Sk4h will back a single-pixel version, and Sk8h any larger number than that. BUG=skia: Review URL: https://codereview.chromium.org/1088883005 2015-04-14 21:02:52 +00:00			`}`