make skvx::if_then_else work at byte granularity

The default implementation of if_then_else is logically bitwise,

   (cond & true_val) | (~cond & false_val)

The existing skvx specializations work only for 32-bit lanes, but we can
easily make them work for any type where the whole vector is the right
size by reducing the granularity down to byte level.

Existing code using 32-bit values and 0xffff'ffff or 0x0000'0000 masks
will continue to work the same.  But this now lets us use, e.g. 16-bit
values with 0xffff and 0x0000 masks, or even things like 32-bit values
and a mask like 0xff00ff00, selecting byte by byte.

We can't go any lower without falling back on the generic bitwise
implementation, so we'll have to settle for not getting to use a mask
like 0x0f0f0f0f.

Change-Id: I8518cb3cafc7f6e1480b4ae8af50daad2d28c5df
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/317170
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2020-09-15 15:26:22 -05:00 committed by Skia Commit-Bot
parent 1cf303fa5a
commit c3ad6a1e59

View File

@ -355,28 +355,28 @@ SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec
// Specializations inline here so they can generalize what types the apply to.
// (This header is used in C++14 contexts, so we have to kind of fake constexpr if.)
#if defined(__AVX__)
if /*constexpr*/ (N == 8 && sizeof(T) == 4) {
return unchecked_bit_pun<Vec<N,T>>(_mm256_blendv_ps(unchecked_bit_pun<__m256>(e),
unchecked_bit_pun<__m256>(t),
unchecked_bit_pun<__m256>(cond)));
if /*constexpr*/ (N*sizeof(T) == 32) {
return unchecked_bit_pun<Vec<N,T>>(_mm256_blendv_epi8(unchecked_bit_pun<__m256i>(e),
unchecked_bit_pun<__m256i>(t),
unchecked_bit_pun<__m256i>(cond)));
}
#endif
#if defined(__SSE4_1__)
if /*constexpr*/ (N == 4 && sizeof(T) == 4) {
return unchecked_bit_pun<Vec<N,T>>(_mm_blendv_ps(unchecked_bit_pun<__m128>(e),
unchecked_bit_pun<__m128>(t),
unchecked_bit_pun<__m128>(cond)));
if /*constexpr*/ (N*sizeof(T) == 16) {
return unchecked_bit_pun<Vec<N,T>>(_mm_blendv_epi8(unchecked_bit_pun<__m128i>(e),
unchecked_bit_pun<__m128i>(t),
unchecked_bit_pun<__m128i>(cond)));
}
#endif
#if defined(__ARM_NEON)
if /*constexpr*/ (N == 4 && sizeof(T) == 4) {
return unchecked_bit_pun<Vec<N,T>>(vbslq_f32(unchecked_bit_pun< uint32x4_t>(cond),
unchecked_bit_pun<float32x4_t>(t),
unchecked_bit_pun<float32x4_t>(e)));
if /*constexpr*/ (N*sizeof(T) == 16) {
return unchecked_bit_pun<Vec<N,T>>(vbslq_u8(unchecked_bit_pun<uint8x16_t>(cond),
unchecked_bit_pun<uint8x16_t>(t),
unchecked_bit_pun<uint8x16_t>(e)));
}
#endif
// Recurse for large vectors to try to hit the specializations above.
if /*constexpr*/ (N > 4) {
if /*constexpr*/ (N*sizeof(T) > 16) {
return join(if_then_else(cond.lo, t.lo, e.lo),
if_then_else(cond.hi, t.hi, e.hi));
}