Update Sk4px to use skvx instead of SkNx

Adds a saturated_add function that was on SkNx and used in
SkXfermode_opts, but hadn't been ported to skvx yet.

Removes the Sk4px_opts variants and simplifies some of its functions;
many were already defined skvx.

The largest change is that Sk4px does not extend skvx::byte16, since it
used to extend Sk16b. Now it just has a vector as a data type. This
was necessary so that we could define operators that were typed for
Sk4px and Wide w/o conflicting with the free operators that were
defined for the base skvx types.

Change-Id: I8c667ba86f662ccf07ad85aa32e78abfc0a8c7ae
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/542645
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
This commit is contained in:
Michael Ludwig 2022-05-20 15:55:12 -04:00 committed by SkCQ
parent 3149a7b283
commit 767586b330
13 changed files with 93 additions and 305 deletions

View File

@ -749,6 +749,31 @@ SIN Vec<N,uint8_t> approx_scale(const Vec<N,uint8_t>& x, const Vec<N,uint8_t>& y
return cast<uint8_t>( (X*Y+X)/256 );
}
// saturated_add(x,y) sums values and clamps to the maximum value instead of overflowing.
SINT std::enable_if_t<std::is_unsigned_v<T>, Vec<N,T>> saturated_add(const Vec<N,T>& x,
const Vec<N,T>& y) {
#if SKVX_USE_SIMD && (defined(__SSE__) || defined(__ARM_NEON))
// Both SSE and ARM have 16-lane saturated adds, so use intrinsics for those and recurse down
// or join up to take advantage.
if constexpr (N == 16 && sizeof(T) == 1) {
#if defined(__SSE__)
return unchecked_bit_pun<Vec<N,T>>(_mm_adds_epu8(unchecked_bit_pun<__m128i>(x),
unchecked_bit_pun<__m128i>(y)));
#else // __ARM_NEON
return unchecked_bit_pun<Vec<N,T>>(vqaddq_u8(unchecked_bit_pun<uint8x16_t>(x),
unchecked_bit_pun<uint8x16_t>(y)));
#endif
} else if constexpr (N < 16 && sizeof(T) == 1) {
return saturated_add(join(x,x), join(y,y)).lo;
} else if constexpr (sizeof(T) == 1) {
return join(saturated_add(x.lo, y.lo), saturated_add(x.hi, y.hi));
}
#endif
// Otherwise saturate manually
auto sum = x + y;
return if_then_else(sum < x, Vec<N,T>(std::numeric_limits<T>::max()), sum);
}
// The ScaledDividerU32 takes a divisor > 1, and creates a function divide(numerator) that
// calculates a numerator / denominator. For this to be rounded properly, numerator should have
// half added in:

View File

@ -240,10 +240,7 @@ generated_cc_atom(
deps = [
"//include/core:SkColor_hdr",
"//include/private:SkColorData_hdr",
"//include/private:SkNx_hdr",
"//src/opts:Sk4px_NEON_hdr",
"//src/opts:Sk4px_SSE2_hdr",
"//src/opts:Sk4px_none_hdr",
"//include/private:SkVx_hdr",
],
)

View File

@ -10,30 +10,27 @@
#include "include/core/SkColor.h"
#include "include/private/SkColorData.h"
#include "include/private/SkNx.h"
// This file may be included multiple times by .cpp files with different flags, leading
// to different definitions. Usually that doesn't matter because it's all inlined, but
// in Debug modes the compilers may not inline everything. So wrap everything in an
// anonymous namespace to give each includer their own silo of this code (or the linker
// will probably pick one randomly for us, which is rarely correct).
namespace { // NOLINT(google-build-namespaces)
#include "include/private/SkVx.h"
// 1, 2 or 4 SkPMColors, generally vectorized.
class Sk4px : public Sk16b {
class Sk4px {
public:
Sk4px(const Sk16b& v) : INHERITED(v) {}
Sk4px(const skvx::byte16& v) : fV(v) {}
static Sk4px DupPMColor(SkPMColor c) {
Sk4u splat(c);
skvx::uint4 splat(c);
Sk4px v;
memcpy((void*)&v, &splat, 16);
return v;
}
Sk4px alphas() const; // ARGB argb XYZW xyzw -> AAAA aaaa XXXX xxxx
Sk4px inv() const { return Sk16b(255) - *this; }
// RGBA rgba XYZW xyzw -> AAAA aaaa WWWW wwww
Sk4px alphas() const {
static_assert(SK_A32_SHIFT == 24, "This method assumes little-endian.");
return Sk4px(skvx::shuffle<3,3,3,3, 7,7,7,7, 11,11,11,11, 15,15,15,15>(fV));
}
Sk4px inv() const { return Sk4px(skvx::byte16(255) - fV); }
// When loading or storing fewer than 4 SkPMColors, we use the low lanes.
static Sk4px Load4(const SkPMColor px[4]) {
@ -53,8 +50,16 @@ public:
}
// Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px.
static Sk4px Load4Alphas(const SkAlpha[4]); // AaXx -> AAAA aaaa XXXX xxxx
static Sk4px Load2Alphas(const SkAlpha[2]); // Aa -> AAAA aaaa ???? ????
// AaXx -> AAAA aaaa XXXX xxxx
static Sk4px Load4Alphas(const SkAlpha alphas[4]) {
skvx::byte4 a = skvx::byte4::Load(alphas);
return Sk4px(skvx::shuffle<0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3>(a));
}
// Aa -> AAAA aaaa ???? ????
static Sk4px Load2Alphas(const SkAlpha alphas[2]) {
skvx::byte2 a = skvx::byte2::Load(alphas);
return Sk4px(join(skvx::shuffle<0,0,0,0, 1,1,1,1>(a), skvx::byte8()));
}
void store4(SkPMColor px[4]) const { memcpy(px, this, 16); }
void store2(SkPMColor px[2]) const { memcpy(px, this, 8); }
@ -62,45 +67,47 @@ public:
// 1, 2, or 4 SkPMColors with 16-bit components.
// This is most useful as the result of a multiply, e.g. from mulWiden().
class Wide : public Sk16h {
class Wide {
public:
Wide(const Sk16h& v) : Sk16h(v) {}
// Add, then pack the top byte of each component back down into 4 SkPMColors.
Sk4px addNarrowHi(const Sk16h&) const;
Wide(const skvx::Vec<16, uint16_t>& v) : fV(v) {}
// Rounds, i.e. (x+127) / 255.
Sk4px div255() const;
Sk4px div255() const { return Sk4px(skvx::div255(fV)); }
// These just keep the types as Wide so the user doesn't have to keep casting.
Wide operator * (const Wide& o) const { return INHERITED::operator*(o); }
Wide operator + (const Wide& o) const { return INHERITED::operator+(o); }
Wide operator - (const Wide& o) const { return INHERITED::operator-(o); }
Wide operator >> (int bits) const { return INHERITED::operator>>(bits); }
Wide operator << (int bits) const { return INHERITED::operator<<(bits); }
Wide operator * (const Wide& o) const { return Wide(fV * o.fV); }
Wide operator + (const Wide& o) const { return Wide(fV + o.fV); }
Wide operator - (const Wide& o) const { return Wide(fV - o.fV); }
Wide operator >> (int bits) const { return Wide(fV >> bits); }
Wide operator << (int bits) const { return Wide(fV << bits); }
private:
using INHERITED = Sk16h;
skvx::Vec<16, uint16_t> fV;
};
Wide widen() const; // Widen 8-bit values to low 8-bits of 16-bit lanes.
Wide mulWiden(const Sk16b&) const; // 8-bit x 8-bit -> 16-bit components.
// Widen 8-bit values to low 8-bits of 16-bit lanes.
Wide widen() const { return Wide(skvx::cast<uint16_t>(fV)); }
// 8-bit x 8-bit -> 16-bit components.
Wide mulWiden(const skvx::byte16& o) const { return Wide(mull(fV, o)); }
// The only 8-bit multiply we use is 8-bit x 8-bit -> 16-bit. Might as well make it pithy.
Wide operator * (const Sk4px& o) const { return this->mulWiden(o); }
Wide operator * (const Sk4px& o) const { return this->mulWiden(o.fV); }
// These just keep the types as Sk4px so the user doesn't have to keep casting.
Sk4px operator + (const Sk4px& o) const { return INHERITED::operator+(o); }
Sk4px operator - (const Sk4px& o) const { return INHERITED::operator-(o); }
Sk4px operator < (const Sk4px& o) const { return INHERITED::operator<(o); }
Sk4px thenElse(const Sk4px& t, const Sk4px& e) const { return INHERITED::thenElse(t,e); }
Sk4px operator + (const Sk4px& o) const { return Sk4px(fV + o.fV); }
Sk4px operator - (const Sk4px& o) const { return Sk4px(fV - o.fV); }
Sk4px operator < (const Sk4px& o) const { return Sk4px(fV < o.fV); }
Sk4px operator & (const Sk4px& o) const { return Sk4px(fV & o.fV); }
Sk4px thenElse(const Sk4px& t, const Sk4px& e) const {
return Sk4px(if_then_else(fV, t.fV, e.fV));
}
// Generally faster than (*this * o).div255().
// May be incorrect by +-1, but is always exactly correct when *this or o is 0 or 255.
Sk4px approxMulDiv255(const Sk16b& o) const {
// (x*y + x) / 256 meets these criteria. (As of course does (x*y + y) / 256 by symmetry.)
// FYI: (x*y + 255) / 256 also meets these criteria. In my brief testing, it was slower.
return this->widen().addNarrowHi(*this * o);
Sk4px approxMulDiv255(const Sk4px& o) const {
return Sk4px(approx_scale(fV, o.fV));
}
Sk4px saturatedAdd(const Sk4px& o) const {
return Sk4px(saturated_add(fV, o.fV));
}
// A generic driver that maps fn over a src array into a dst array.
@ -192,7 +199,7 @@ public:
dst += 2; a += 2; n -= 2;
}
if (n >= 1) {
fn(Load1(dst), Sk16b(*a)).store1(dst);
fn(Load1(dst), skvx::byte16(*a)).store1(dst);
}
break;
}
@ -224,7 +231,7 @@ public:
dst += 2; src += 2; a += 2; n -= 2;
}
if (n >= 1) {
fn(Load1(dst), Load1(src), Sk16b(*a)).store1(dst);
fn(Load1(dst), Load1(src), skvx::byte16(*a)).store1(dst);
}
break;
}
@ -233,24 +240,10 @@ public:
private:
Sk4px() = default;
using INHERITED = Sk16b;
skvx::byte16 fV;
};
static_assert(sizeof(Sk4px) == sizeof(Sk16b));
static_assert(sizeof(Sk4px) == 16);
static_assert(sizeof(Sk4px) == sizeof(skvx::byte16));
static_assert(alignof(Sk4px) == alignof(skvx::byte16));
} // namespace
#ifdef SKNX_NO_SIMD
#include "src/opts/Sk4px_none.h"
#else
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
#include "src/opts/Sk4px_SSE2.h"
#elif defined(SK_ARM_HAS_NEON)
#include "src/opts/Sk4px_NEON.h"
#else
#include "src/opts/Sk4px_none.h"
#endif
#endif
#endif//Sk4px_DEFINED
#endif // Sk4px_DEFINED

View File

@ -20,25 +20,6 @@ cc_library(
],
)
generated_cc_atom(
name = "Sk4px_NEON_hdr",
hdrs = ["Sk4px_NEON.h"],
visibility = ["//:__subpackages__"],
)
generated_cc_atom(
name = "Sk4px_SSE2_hdr",
hdrs = ["Sk4px_SSE2.h"],
visibility = ["//:__subpackages__"],
)
generated_cc_atom(
name = "Sk4px_none_hdr",
hdrs = ["Sk4px_none.h"],
visibility = ["//:__subpackages__"],
deps = ["//src/core:SkUtils_hdr"],
)
generated_cc_atom(
name = "SkBitmapProcState_opts_hdr",
hdrs = ["SkBitmapProcState_opts.h"],
@ -198,7 +179,6 @@ generated_cc_atom(
hdrs = ["SkXfermode_opts.h"],
visibility = ["//:__subpackages__"],
deps = [
"//include/private:SkNx_hdr",
"//src/core:Sk4px_hdr",
"//src/core:SkMSAN_hdr",
"//src/core:SkXfermodePriv_hdr",

View File

@ -1,56 +0,0 @@
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
namespace { // NOLINT(google-build-namespaces)
inline Sk4px::Wide Sk4px::widen() const {
return Sk16h(vmovl_u8(vget_low_u8 (this->fVec)),
vmovl_u8(vget_high_u8(this->fVec)));
}
inline Sk4px::Wide Sk4px::mulWiden(const Sk16b& other) const {
return Sk16h(vmull_u8(vget_low_u8 (this->fVec), vget_low_u8 (other.fVec)),
vmull_u8(vget_high_u8(this->fVec), vget_high_u8(other.fVec)));
}
inline Sk4px Sk4px::Wide::addNarrowHi(const Sk16h& other) const {
const Sk4px::Wide o(other); // Should be no code, but allows us to access fLo, fHi.
return Sk16b(vcombine_u8(vaddhn_u16(this->fLo.fVec, o.fLo.fVec),
vaddhn_u16(this->fHi.fVec, o.fHi.fVec)));
}
inline Sk4px Sk4px::Wide::div255() const {
// Calculated as (x + (x+128)>>8 +128) >> 8. The 'r' in each instruction provides each +128.
return Sk16b(vcombine_u8(vraddhn_u16(this->fLo.fVec, vrshrq_n_u16(this->fLo.fVec, 8)),
vraddhn_u16(this->fHi.fVec, vrshrq_n_u16(this->fHi.fVec, 8))));
}
inline Sk4px Sk4px::alphas() const {
auto as = vshrq_n_u32((uint32x4_t)fVec, SK_A32_SHIFT); // ___3 ___2 ___1 ___0
return Sk16b((uint8x16_t)vmulq_n_u32(as, 0x01010101)); // 3333 2222 1111 0000
}
inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) {
uint8x16_t a8 = vdupq_n_u8(0); // ____ ____ ____ ____
a8 = vld1q_lane_u8(a+0, a8, 0); // ____ ____ ____ ___0
a8 = vld1q_lane_u8(a+1, a8, 4); // ____ ____ ___1 ___0
a8 = vld1q_lane_u8(a+2, a8, 8); // ____ ___2 ___1 ___0
a8 = vld1q_lane_u8(a+3, a8, 12); // ___3 ___2 ___1 ___0
auto a32 = (uint32x4_t)a8; //
return Sk16b((uint8x16_t)vmulq_n_u32(a32, 0x01010101)); // 3333 2222 1111 0000
}
inline Sk4px Sk4px::Load2Alphas(const SkAlpha a[2]) {
uint8x16_t a8 = vdupq_n_u8(0); // ____ ____ ____ ____
a8 = vld1q_lane_u8(a+0, a8, 0); // ____ ____ ____ ___0
a8 = vld1q_lane_u8(a+1, a8, 4); // ____ ____ ___1 ___0
auto a32 = (uint32x4_t)a8; //
return Sk16b((uint8x16_t)vmulq_n_u32(a32, 0x01010101)); // ____ ____ 1111 0000
}
} // namespace

View File

@ -1,76 +0,0 @@
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
namespace { // NOLINT(google-build-namespaces)
inline Sk4px::Wide Sk4px::widen() const {
return Sk16h(_mm_unpacklo_epi8(this->fVec, _mm_setzero_si128()),
_mm_unpackhi_epi8(this->fVec, _mm_setzero_si128()));
}
inline Sk4px::Wide Sk4px::mulWiden(const Sk16b& other) const {
return this->widen() * Sk4px(other).widen();
}
inline Sk4px Sk4px::Wide::addNarrowHi(const Sk16h& other) const {
Sk4px::Wide r = (*this + other) >> 8;
return Sk4px(_mm_packus_epi16(r.fLo.fVec, r.fHi.fVec));
}
inline Sk4px Sk4px::Wide::div255() const {
// (x + 127) / 255 == ((x+128) * 257)>>16,
// and _mm_mulhi_epu16 makes the (_ * 257)>>16 part very convenient.
const __m128i _128 = _mm_set1_epi16(128),
_257 = _mm_set1_epi16(257);
return Sk4px(_mm_packus_epi16(_mm_mulhi_epu16(_mm_add_epi16(fLo.fVec, _128), _257),
_mm_mulhi_epu16(_mm_add_epi16(fHi.fVec, _128), _257)));
}
// Load4Alphas and Load2Alphas use possibly-unaligned loads (SkAlpha[] -> uint16_t or uint32_t).
// These are safe on x86, often with no speed penalty.
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
inline Sk4px Sk4px::alphas() const {
static_assert(SK_A32_SHIFT == 24, "Intel's always little-endian.");
__m128i splat = _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7,7,7, 3,3,3,3);
return Sk16b(_mm_shuffle_epi8(this->fVec, splat));
}
inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) {
uint32_t as;
memcpy(&as, a, 4);
__m128i splat = _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0);
return Sk16b(_mm_shuffle_epi8(_mm_cvtsi32_si128(as), splat));
}
#else
inline Sk4px Sk4px::alphas() const {
static_assert(SK_A32_SHIFT == 24, "Intel's always little-endian.");
// We exploit that A >= rgb for any premul pixel.
__m128i as = fVec; // 3xxx 2xxx 1xxx 0xxx
as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx
as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000
return Sk16b(as);
}
inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) {
__m128i as;
memcpy(&as, a, 4); // ____ ____ ____ 3210
as = _mm_unpacklo_epi8 (as, as); // ____ ____ 3322 1100
as = _mm_unpacklo_epi16(as, as); // 3333 2222 1111 0000
return Sk16b(as);
}
#endif
inline Sk4px Sk4px::Load2Alphas(const SkAlpha a[2]) {
uint16_t alphas;
memcpy(&alphas, a, 2);
uint32_t alphas_and_two_zeros = alphas; // Aa -> Aa00
return Load4Alphas((const SkAlpha*)&alphas_and_two_zeros);
}
} // namespace

View File

@ -1,59 +0,0 @@
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "src/core/SkUtils.h"
namespace { // NOLINT(google-build-namespaces)
inline Sk4px::Wide Sk4px::widen() const {
return Sk16h((*this)[ 0], (*this)[ 1], (*this)[ 2], (*this)[ 3],
(*this)[ 4], (*this)[ 5], (*this)[ 6], (*this)[ 7],
(*this)[ 8], (*this)[ 9], (*this)[10], (*this)[11],
(*this)[12], (*this)[13], (*this)[14], (*this)[15]);
}
inline Sk4px::Wide Sk4px::mulWiden(const Sk16b& other) const {
return this->widen() * Sk4px(other).widen();
}
inline Sk4px Sk4px::Wide::addNarrowHi(const Sk16h& other) const {
Sk4px::Wide r = (*this + other) >> 8;
return Sk16b(r[ 0], r[ 1], r[ 2], r[ 3],
r[ 4], r[ 5], r[ 6], r[ 7],
r[ 8], r[ 9], r[10], r[11],
r[12], r[13], r[14], r[15]);
}
inline Sk4px Sk4px::Wide::div255() const {
// Calculated as ((x+128) + ((x+128)>>8)) >> 8.
auto v = *this + Sk16h(128);
return v.addNarrowHi(v>>8);
}
inline Sk4px Sk4px::alphas() const {
static_assert(SK_A32_SHIFT == 24, "This method assumes little-endian.");
return Sk16b((*this)[ 3], (*this)[ 3], (*this)[ 3], (*this)[ 3],
(*this)[ 7], (*this)[ 7], (*this)[ 7], (*this)[ 7],
(*this)[11], (*this)[11], (*this)[11], (*this)[11],
(*this)[15], (*this)[15], (*this)[15], (*this)[15]);
}
inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) {
return Sk16b(a[0], a[0], a[0], a[0],
a[1], a[1], a[1], a[1],
a[2], a[2], a[2], a[2],
a[3], a[3], a[3], a[3]);
}
inline Sk4px Sk4px::Load2Alphas(const SkAlpha a[2]) {
return Sk16b(a[0], a[0], a[0], a[0],
a[1], a[1], a[1], a[1],
0,0,0,0,
0,0,0,0);
}
} // namespace

View File

@ -205,7 +205,7 @@ namespace SK_OPTS_NS {
// ~~~>
// a = 1*aa + d(1-1*aa) = aa + d(1-aa)
// c = 0*aa + d(1-1*aa) = d(1-aa)
return Sk4px(Sk16b(aa) & Sk16b(0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255))
return (aa & Sk4px(skvx::byte16{0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255}))
+ d.approxMulDiv255(aa.inv());
};
while (h --> 0) {

View File

@ -8,7 +8,6 @@
#ifndef Sk4pxXfermode_DEFINED
#define Sk4pxXfermode_DEFINED
#include "include/private/SkNx.h"
#include "src/core/Sk4px.h"
#include "src/core/SkMSAN.h"
#include "src/core/SkXfermodePriv.h"

View File

@ -5562,7 +5562,6 @@ generated_cc_atom(
":Test_hdr",
"//include/private:SkNx_hdr",
"//include/utils:SkRandom_hdr",
"//src/core:Sk4px_hdr",
],
)

View File

@ -7,7 +7,6 @@
#include "include/private/SkNx.h"
#include "include/utils/SkRandom.h"
#include "src/core/Sk4px.h"
#include "tests/Test.h"
template <int N>
@ -185,29 +184,6 @@ DEF_TEST(SkNi_mulHi, r) {
REPORTER_ASSERT(r, c[3] == q[3]);
}
DEF_TEST(Sk4px_muldiv255round, r) {
for (int a = 0; a < (1<<8); a++) {
for (int b = 0; b < (1<<8); b++) {
int exact = (a*b+127)/255;
// Duplicate a and b 16x each.
Sk4px av = Sk16b(a),
bv = Sk16b(b);
// This way should always be exactly correct.
int correct = (av * bv).div255()[0];
REPORTER_ASSERT(r, correct == exact);
// We're a bit more flexible on this method: correct for 0 or 255, otherwise off by <=1.
int fast = av.approxMulDiv255(bv)[0];
REPORTER_ASSERT(r, fast-exact >= -1 && fast-exact <= 1);
if (a == 0 || a == 255 || b == 0 || b == 255) {
REPORTER_ASSERT(r, fast == exact);
}
}
}
}
DEF_TEST(SkNx_abs, r) {
auto fs = Sk4f(0.0f, -0.0f, 2.0f, -4.0f).abs();
REPORTER_ASSERT(r, fs[0] == 0.0f);

View File

@ -304,7 +304,7 @@ DEF_TEST(SkVx_strided_loads, r) {
check_strided_loads<float>(r);
}
DEF_TEST(SkVM_ScaledDividerU32, r) {
DEF_TEST(SkVx_ScaledDividerU32, r) {
static constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
auto errorBounds = [&](uint32_t actual, uint32_t expected) {
@ -342,4 +342,16 @@ DEF_TEST(SkVM_ScaledDividerU32, r) {
test(512'927'377);
}
DEF_TEST(SkVx_saturated_add, r) {
for (int a = 0; a < (1<<8); a++) {
for (int b = 0; b < (1<<8); b++) {
int exact = a+b;
if (exact > 255) { exact = 255; }
if (exact < 0) { exact = 0; }
REPORTER_ASSERT(r, saturated_add(skvx::byte16(a), skvx::byte16(b))[0] == exact);
}
}
}
} // namespace skvx

View File

@ -48,7 +48,6 @@ ignore = re.compile('|'.join([
r'src/opts/.*_SSSE3\.h',
r'src/opts/.*_neon\.h',
r'src/opts/.*_sse\.h',
r'src/opts/Sk4px_.*\.h',
r'src/ports/.*',
r'src/utils/.*_win\.h',
r'src/utils/win/.*',
@ -113,4 +112,3 @@ def main(argv):
if __name__ == '__main__':
main(sys.argv)