make skvx::if_then_else work at byte granularity

The default implementation of if_then_else is logically bitwise, (cond & true_val) | (~cond & false_val) The existing skvx specializations work only for 32-bit lanes, but we can easily make them work for any type where the whole vector is the right size by reducing the granularity down to byte level. Existing code using 32-bit values and 0xffff'ffff or 0x0000'0000 masks will continue to work the same. But this now lets us use, e.g. 16-bit values with 0xffff and 0x0000 masks, or even things like 32-bit values and a mask like 0xff00ff00, selecting byte by byte. We can't go any lower without falling back on the generic bitwise implementation, so we'll have to settle for not getting to use a mask like 0x0f0f0f0f. Change-Id: I8518cb3cafc7f6e1480b4ae8af50daad2d28c5df Reviewed-on: https://skia-review.googlesource.com/c/skia/+/317170 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
2020-09-15 15:26:22 -05:00 · 2020-09-15 15:26:22 -05:00 · c3ad6a1e59
commit c3ad6a1e59
parent 1cf303fa5a
1 changed files with 13 additions and 13 deletions
--- a/include/private/SkVx.h
+++ b/include/private/SkVx.h
@ -355,28 +355,28 @@ SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec
    // Specializations inline here so they can generalize what types the apply to.
    // (This header is used in C++14 contexts, so we have to kind of fake constexpr if.)
 #if defined(__AVX__)
-    if /*constexpr*/ (N == 8 && sizeof(T) == 4) {
-        return unchecked_bit_pun<Vec<N,T>>(_mm256_blendv_ps(unchecked_bit_pun<__m256>(e),
-                                                            unchecked_bit_pun<__m256>(t),
-                                                            unchecked_bit_pun<__m256>(cond)));
+    if /*constexpr*/ (N*sizeof(T) == 32) {
+        return unchecked_bit_pun<Vec<N,T>>(_mm256_blendv_epi8(unchecked_bit_pun<__m256i>(e),
+                                                              unchecked_bit_pun<__m256i>(t),
+                                                              unchecked_bit_pun<__m256i>(cond)));
    }
 #endif
 #if defined(__SSE4_1__)
-    if /*constexpr*/ (N == 4 && sizeof(T) == 4) {
-        return unchecked_bit_pun<Vec<N,T>>(_mm_blendv_ps(unchecked_bit_pun<__m128>(e),
-                                                         unchecked_bit_pun<__m128>(t),
-                                                         unchecked_bit_pun<__m128>(cond)));
+    if /*constexpr*/ (N*sizeof(T) == 16) {
+        return unchecked_bit_pun<Vec<N,T>>(_mm_blendv_epi8(unchecked_bit_pun<__m128i>(e),
+                                                           unchecked_bit_pun<__m128i>(t),
+                                                           unchecked_bit_pun<__m128i>(cond)));
    }
 #endif
 #if defined(__ARM_NEON)
-    if /*constexpr*/ (N == 4 && sizeof(T) == 4) {
-        return unchecked_bit_pun<Vec<N,T>>(vbslq_f32(unchecked_bit_pun< uint32x4_t>(cond),
-                                                     unchecked_bit_pun<float32x4_t>(t),
-                                                     unchecked_bit_pun<float32x4_t>(e)));
+    if /*constexpr*/ (N*sizeof(T) == 16) {
+        return unchecked_bit_pun<Vec<N,T>>(vbslq_u8(unchecked_bit_pun<uint8x16_t>(cond),
+                                                    unchecked_bit_pun<uint8x16_t>(t),
+                                                    unchecked_bit_pun<uint8x16_t>(e)));
    }
 #endif
    // Recurse for large vectors to try to hit the specializations above.
-    if /*constexpr*/ (N > 4) {
+    if /*constexpr*/ (N*sizeof(T) > 16) {
        return join(if_then_else(cond.lo, t.lo, e.lo),
                    if_then_else(cond.hi, t.hi, e.hi));
    }