33cbfd75af
Every type now nominally has Load4() and Store4() methods. The ones that we use are implemented. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3046 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Change-Id: I7984f0c2063ef8acbc322bd2e968f8f7eaa0d8fd Reviewed-on: https://skia-review.googlesource.com/3046 Reviewed-by: Matt Sarett <msarett@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
342 lines
10 KiB
C++
342 lines
10 KiB
C++
/*
|
|
* Copyright 2015 Google Inc.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
* found in the LICENSE file.
|
|
*/
|
|
|
|
#include "Sk4px.h"
|
|
#include "SkNx.h"
|
|
#include "SkRandom.h"
|
|
#include "Test.h"
|
|
|
|
template <int N>
|
|
static void test_Nf(skiatest::Reporter* r) {
|
|
|
|
auto assert_nearly_eq = [&](float eps, const SkNx<N, float>& v,
|
|
float a, float b, float c, float d) {
|
|
auto close = [=](float a, float b) { return fabsf(a-b) <= eps; };
|
|
float vals[4];
|
|
v.store(vals);
|
|
bool ok = close(vals[0], a) && close(vals[1], b)
|
|
&& close( v[0], a) && close( v[1], b);
|
|
REPORTER_ASSERT(r, ok);
|
|
if (N == 4) {
|
|
ok = close(vals[2], c) && close(vals[3], d)
|
|
&& close( v[2], c) && close( v[3], d);
|
|
REPORTER_ASSERT(r, ok);
|
|
}
|
|
};
|
|
auto assert_eq = [&](const SkNx<N, float>& v, float a, float b, float c, float d) {
|
|
return assert_nearly_eq(0, v, a,b,c,d);
|
|
};
|
|
|
|
float vals[] = {3, 4, 5, 6};
|
|
SkNx<N,float> a = SkNx<N,float>::Load(vals),
|
|
b(a),
|
|
c = a;
|
|
SkNx<N,float> d;
|
|
d = a;
|
|
|
|
assert_eq(a, 3, 4, 5, 6);
|
|
assert_eq(b, 3, 4, 5, 6);
|
|
assert_eq(c, 3, 4, 5, 6);
|
|
assert_eq(d, 3, 4, 5, 6);
|
|
|
|
assert_eq(a+b, 6, 8, 10, 12);
|
|
assert_eq(a*b, 9, 16, 25, 36);
|
|
assert_eq(a*b-b, 6, 12, 20, 30);
|
|
assert_eq((a*b).sqrt(), 3, 4, 5, 6);
|
|
assert_eq(a/b, 1, 1, 1, 1);
|
|
assert_eq(SkNx<N,float>(0)-a, -3, -4, -5, -6);
|
|
|
|
SkNx<N,float> fours(4);
|
|
|
|
assert_eq(fours.sqrt(), 2,2,2,2);
|
|
assert_nearly_eq(0.001f, fours.rsqrt(), 0.5, 0.5, 0.5, 0.5);
|
|
|
|
assert_nearly_eq(0.001f, fours.invert(), 0.25, 0.25, 0.25, 0.25);
|
|
|
|
assert_eq(SkNx<N,float>::Min(a, fours), 3, 4, 4, 4);
|
|
assert_eq(SkNx<N,float>::Max(a, fours), 4, 4, 5, 6);
|
|
|
|
// Test some comparisons. This is not exhaustive.
|
|
REPORTER_ASSERT(r, (a == b).allTrue());
|
|
REPORTER_ASSERT(r, (a+b == a*b-b).anyTrue());
|
|
REPORTER_ASSERT(r, !(a+b == a*b-b).allTrue());
|
|
REPORTER_ASSERT(r, !(a+b == a*b).anyTrue());
|
|
REPORTER_ASSERT(r, !(a != b).anyTrue());
|
|
REPORTER_ASSERT(r, (a < fours).anyTrue());
|
|
REPORTER_ASSERT(r, (a <= fours).anyTrue());
|
|
REPORTER_ASSERT(r, !(a > fours).allTrue());
|
|
REPORTER_ASSERT(r, !(a >= fours).allTrue());
|
|
}
|
|
|
|
DEF_TEST(SkNf, r) {
|
|
test_Nf<2>(r);
|
|
test_Nf<4>(r);
|
|
}
|
|
|
|
template <int N, typename T>
|
|
void test_Ni(skiatest::Reporter* r) {
|
|
auto assert_eq = [&](const SkNx<N,T>& v, T a, T b, T c, T d, T e, T f, T g, T h) {
|
|
T vals[8];
|
|
v.store(vals);
|
|
|
|
switch (N) {
|
|
case 8: REPORTER_ASSERT(r, vals[4] == e && vals[5] == f && vals[6] == g && vals[7] == h);
|
|
case 4: REPORTER_ASSERT(r, vals[2] == c && vals[3] == d);
|
|
case 2: REPORTER_ASSERT(r, vals[0] == a && vals[1] == b);
|
|
}
|
|
switch (N) {
|
|
case 8: REPORTER_ASSERT(r, v[4] == e && v[5] == f &&
|
|
v[6] == g && v[7] == h);
|
|
case 4: REPORTER_ASSERT(r, v[2] == c && v[3] == d);
|
|
case 2: REPORTER_ASSERT(r, v[0] == a && v[1] == b);
|
|
}
|
|
};
|
|
|
|
T vals[] = { 1,2,3,4,5,6,7,8 };
|
|
SkNx<N,T> a = SkNx<N,T>::Load(vals),
|
|
b(a),
|
|
c = a;
|
|
SkNx<N,T> d;
|
|
d = a;
|
|
|
|
assert_eq(a, 1,2,3,4,5,6,7,8);
|
|
assert_eq(b, 1,2,3,4,5,6,7,8);
|
|
assert_eq(c, 1,2,3,4,5,6,7,8);
|
|
assert_eq(d, 1,2,3,4,5,6,7,8);
|
|
|
|
assert_eq(a+a, 2,4,6,8,10,12,14,16);
|
|
assert_eq(a*a, 1,4,9,16,25,36,49,64);
|
|
assert_eq(a*a-a, 0,2,6,12,20,30,42,56);
|
|
|
|
assert_eq(a >> 2, 0,0,0,1,1,1,1,2);
|
|
assert_eq(a << 1, 2,4,6,8,10,12,14,16);
|
|
|
|
REPORTER_ASSERT(r, a[1] == 2);
|
|
}
|
|
|
|
DEF_TEST(SkNx, r) {
|
|
test_Ni<2, uint16_t>(r);
|
|
test_Ni<4, uint16_t>(r);
|
|
test_Ni<8, uint16_t>(r);
|
|
|
|
test_Ni<2, int>(r);
|
|
test_Ni<4, int>(r);
|
|
test_Ni<8, int>(r);
|
|
}
|
|
|
|
DEF_TEST(SkNi_min_lt, r) {
|
|
// Exhaustively check the 8x8 bit space.
|
|
for (int a = 0; a < (1<<8); a++) {
|
|
for (int b = 0; b < (1<<8); b++) {
|
|
Sk16b aw(a), bw(b);
|
|
REPORTER_ASSERT(r, Sk16b::Min(aw, bw)[0] == SkTMin(a, b));
|
|
REPORTER_ASSERT(r, !(aw < bw)[0] == !(a < b));
|
|
}}
|
|
|
|
// Exhausting the 16x16 bit space is kind of slow, so only do that in release builds.
|
|
#ifdef SK_DEBUG
|
|
SkRandom rand;
|
|
for (int i = 0; i < (1<<16); i++) {
|
|
uint16_t a = rand.nextU() >> 16,
|
|
b = rand.nextU() >> 16;
|
|
REPORTER_ASSERT(r, Sk16h::Min(Sk16h(a), Sk16h(b))[0] == SkTMin(a, b));
|
|
}
|
|
#else
|
|
for (int a = 0; a < (1<<16); a++) {
|
|
for (int b = 0; b < (1<<16); b++) {
|
|
REPORTER_ASSERT(r, Sk16h::Min(Sk16h(a), Sk16h(b))[0] == SkTMin(a, b));
|
|
}}
|
|
#endif
|
|
}
|
|
|
|
DEF_TEST(SkNi_saturatedAdd, r) {
|
|
for (int a = 0; a < (1<<8); a++) {
|
|
for (int b = 0; b < (1<<8); b++) {
|
|
int exact = a+b;
|
|
if (exact > 255) { exact = 255; }
|
|
if (exact < 0) { exact = 0; }
|
|
|
|
REPORTER_ASSERT(r, Sk16b(a).saturatedAdd(Sk16b(b))[0] == exact);
|
|
}
|
|
}
|
|
}
|
|
|
|
DEF_TEST(Sk4px_muldiv255round, r) {
|
|
for (int a = 0; a < (1<<8); a++) {
|
|
for (int b = 0; b < (1<<8); b++) {
|
|
int exact = (a*b+127)/255;
|
|
|
|
// Duplicate a and b 16x each.
|
|
auto av = Sk4px::DupAlpha(a),
|
|
bv = Sk4px::DupAlpha(b);
|
|
|
|
// This way should always be exactly correct.
|
|
int correct = (av * bv).div255()[0];
|
|
REPORTER_ASSERT(r, correct == exact);
|
|
|
|
// We're a bit more flexible on this method: correct for 0 or 255, otherwise off by <=1.
|
|
int fast = av.approxMulDiv255(bv)[0];
|
|
REPORTER_ASSERT(r, fast-exact >= -1 && fast-exact <= 1);
|
|
if (a == 0 || a == 255 || b == 0 || b == 255) {
|
|
REPORTER_ASSERT(r, fast == exact);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
DEF_TEST(Sk4px_widening, r) {
|
|
SkPMColor colors[] = {
|
|
SkPreMultiplyColor(0xff00ff00),
|
|
SkPreMultiplyColor(0x40008000),
|
|
SkPreMultiplyColor(0x7f020406),
|
|
SkPreMultiplyColor(0x00000000),
|
|
};
|
|
auto packed = Sk4px::Load4(colors);
|
|
|
|
auto wideLo = packed.widenLo(),
|
|
wideHi = packed.widenHi(),
|
|
wideLoHi = packed.widenLoHi(),
|
|
wideLoHiAlt = wideLo + wideHi;
|
|
REPORTER_ASSERT(r, 0 == memcmp(&wideLoHi, &wideLoHiAlt, sizeof(wideLoHi)));
|
|
}
|
|
|
|
DEF_TEST(SkNx_abs, r) {
|
|
auto fs = Sk4f(0.0f, -0.0f, 2.0f, -4.0f).abs();
|
|
REPORTER_ASSERT(r, fs[0] == 0.0f);
|
|
REPORTER_ASSERT(r, fs[1] == 0.0f);
|
|
REPORTER_ASSERT(r, fs[2] == 2.0f);
|
|
REPORTER_ASSERT(r, fs[3] == 4.0f);
|
|
}
|
|
|
|
DEF_TEST(SkNx_floor, r) {
|
|
auto fs = Sk4f(0.4f, -0.4f, 0.6f, -0.6f).floor();
|
|
REPORTER_ASSERT(r, fs[0] == 0.0f);
|
|
REPORTER_ASSERT(r, fs[1] == -1.0f);
|
|
REPORTER_ASSERT(r, fs[2] == 0.0f);
|
|
REPORTER_ASSERT(r, fs[3] == -1.0f);
|
|
}
|
|
|
|
DEF_TEST(SkNx_shuffle, r) {
|
|
Sk4f f4(0,10,20,30);
|
|
|
|
Sk2f f2 = SkNx_shuffle<2,1>(f4);
|
|
REPORTER_ASSERT(r, f2[0] == 20);
|
|
REPORTER_ASSERT(r, f2[1] == 10);
|
|
|
|
f4 = SkNx_shuffle<0,1,1,0>(f2);
|
|
REPORTER_ASSERT(r, f4[0] == 20);
|
|
REPORTER_ASSERT(r, f4[1] == 10);
|
|
REPORTER_ASSERT(r, f4[2] == 10);
|
|
REPORTER_ASSERT(r, f4[3] == 20);
|
|
}
|
|
|
|
DEF_TEST(SkNx_int_float, r) {
|
|
Sk4f f(-2.3f, 1.0f, 0.45f, 0.6f);
|
|
|
|
Sk4i i = SkNx_cast<int>(f);
|
|
REPORTER_ASSERT(r, i[0] == -2);
|
|
REPORTER_ASSERT(r, i[1] == 1);
|
|
REPORTER_ASSERT(r, i[2] == 0);
|
|
REPORTER_ASSERT(r, i[3] == 0);
|
|
|
|
f = SkNx_cast<float>(i);
|
|
REPORTER_ASSERT(r, f[0] == -2.0f);
|
|
REPORTER_ASSERT(r, f[1] == 1.0f);
|
|
REPORTER_ASSERT(r, f[2] == 0.0f);
|
|
REPORTER_ASSERT(r, f[3] == 0.0f);
|
|
}
|
|
|
|
#include "SkRandom.h"
|
|
|
|
DEF_TEST(SkNx_u16_float, r) {
|
|
{
|
|
// u16 --> float
|
|
auto h4 = Sk4h(15, 17, 257, 65535);
|
|
auto f4 = SkNx_cast<float>(h4);
|
|
REPORTER_ASSERT(r, f4[0] == 15.0f);
|
|
REPORTER_ASSERT(r, f4[1] == 17.0f);
|
|
REPORTER_ASSERT(r, f4[2] == 257.0f);
|
|
REPORTER_ASSERT(r, f4[3] == 65535.0f);
|
|
}
|
|
{
|
|
// float -> u16
|
|
auto f4 = Sk4f(15, 17, 257, 65535);
|
|
auto h4 = SkNx_cast<uint16_t>(f4);
|
|
REPORTER_ASSERT(r, h4[0] == 15);
|
|
REPORTER_ASSERT(r, h4[1] == 17);
|
|
REPORTER_ASSERT(r, h4[2] == 257);
|
|
REPORTER_ASSERT(r, h4[3] == 65535);
|
|
}
|
|
|
|
// starting with any u16 value, we should be able to have a perfect round-trip in/out of floats
|
|
//
|
|
SkRandom rand;
|
|
for (int i = 0; i < 10000; ++i) {
|
|
const uint16_t s16[4] {
|
|
(uint16_t)rand.nextU16(), (uint16_t)rand.nextU16(),
|
|
(uint16_t)rand.nextU16(), (uint16_t)rand.nextU16(),
|
|
};
|
|
auto u4_0 = Sk4h::Load(s16);
|
|
auto f4 = SkNx_cast<float>(u4_0);
|
|
auto u4_1 = SkNx_cast<uint16_t>(f4);
|
|
uint16_t d16[4];
|
|
u4_1.store(d16);
|
|
REPORTER_ASSERT(r, !memcmp(s16, d16, sizeof(s16)));
|
|
}
|
|
}
|
|
|
|
// The SSE2 implementation of SkNx_cast<uint16_t>(Sk4i) is non-trivial, so worth a test.
|
|
DEF_TEST(SkNx_int_u16, r) {
|
|
// These are pretty hard to get wrong.
|
|
for (int i = 0; i <= 0x7fff; i++) {
|
|
uint16_t expected = (uint16_t)i;
|
|
uint16_t actual = SkNx_cast<uint16_t>(Sk4i(i))[0];
|
|
|
|
REPORTER_ASSERT(r, expected == actual);
|
|
}
|
|
|
|
// A naive implementation with _mm_packs_epi32 would succeed up to 0x7fff but fail here:
|
|
for (int i = 0x8000; (1) && i <= 0xffff; i++) {
|
|
uint16_t expected = (uint16_t)i;
|
|
uint16_t actual = SkNx_cast<uint16_t>(Sk4i(i))[0];
|
|
|
|
REPORTER_ASSERT(r, expected == actual);
|
|
}
|
|
}
|
|
|
|
DEF_TEST(SkNx_4fLoad4Store4, r) {
|
|
float src[] = {
|
|
0.0f, 1.0f, 2.0f, 3.0f,
|
|
4.0f, 5.0f, 6.0f, 7.0f,
|
|
8.0f, 9.0f, 10.0f, 11.0f,
|
|
12.0f, 13.0f, 14.0f, 15.0f
|
|
};
|
|
|
|
Sk4f a, b, c, d;
|
|
Sk4f::Load4(src, &a, &b, &c, &d);
|
|
REPORTER_ASSERT(r, 0.0f == a[0]);
|
|
REPORTER_ASSERT(r, 4.0f == a[1]);
|
|
REPORTER_ASSERT(r, 8.0f == a[2]);
|
|
REPORTER_ASSERT(r, 12.0f == a[3]);
|
|
REPORTER_ASSERT(r, 1.0f == b[0]);
|
|
REPORTER_ASSERT(r, 5.0f == b[1]);
|
|
REPORTER_ASSERT(r, 9.0f == b[2]);
|
|
REPORTER_ASSERT(r, 13.0f == b[3]);
|
|
REPORTER_ASSERT(r, 2.0f == c[0]);
|
|
REPORTER_ASSERT(r, 6.0f == c[1]);
|
|
REPORTER_ASSERT(r, 10.0f == c[2]);
|
|
REPORTER_ASSERT(r, 14.0f == c[3]);
|
|
REPORTER_ASSERT(r, 3.0f == d[0]);
|
|
REPORTER_ASSERT(r, 7.0f == d[1]);
|
|
REPORTER_ASSERT(r, 11.0f == d[2]);
|
|
REPORTER_ASSERT(r, 15.0f == d[3]);
|
|
|
|
float dst[16];
|
|
Sk4f::Store4(dst, a, b, c, d);
|
|
REPORTER_ASSERT(r, 0 == memcmp(dst, src, 16 * sizeof(float)));
|
|
}
|