Reland "standardize macro checks in SkRasterPipeline_opts"

This is a reland of 75d25c4c8f Last time I screwed up by replacing defined(__AVX2__) with defined(JUMPER_IS_HSW). When __AVX2__ is defined, we'll be using AVX2, _or better_. This time around I think I've got it all straight translating the old __FOO__ into the equivalent JUMPER_IS_FOO. Last time I also missed a defined(__SSE__), which has been translated into JUMPER_IS_FOOs now too. Original change's description: > standardize macro checks in SkRasterPipeline_opts > > We mostly check JUMPER_IS_FOO macros, but in a few places we are > checking the raw __FOO__ compiler macros instead. If only for clarity, > switch to JUMPER_IS_FOO. > > This is also a step towards me being able to control the instruction > set we choose without having to #define these protected __FOO__ macros. > > Change-Id: Ieea2090ff658399e27746e0bb8ce950b06f9efb8 > Reviewed-on: https://skia-review.googlesource.com/150961 > Commit-Queue: Brian Osman <brianosman@google.com> > Auto-Submit: Mike Klein <mtklein@google.com> > Reviewed-by: Brian Osman <brianosman@google.com> Change-Id: I656d89e3d3cd7fa23fd618c80e59908fd2b31329 Reviewed-on: https://skia-review.googlesource.com/150965 Reviewed-by: Brian Osman <brianosman@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
2018-08-31 10:19:21 -04:00 · 2018-08-31 10:19:21 -04:00 · 83e86ebf3c
commit 83e86ebf3c
parent 545aa936f5
1 changed files with 12 additions and 12 deletions
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@ -2191,7 +2191,7 @@ namespace lowp {
 #else  // We are compiling vector code with Clang... let's make some lowp stages!
-#if defined(__AVX2__)
+#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
    using U8  = uint8_t  __attribute__((ext_vector_type(16)));
    using U16 = uint16_t __attribute__((ext_vector_type(16)));
    using I16 =  int16_t __attribute__((ext_vector_type(16)));
@ -2417,11 +2417,11 @@ SI F mad(F f, F m, F a) { return f*m+a; }
 SI U32 trunc_(F x) { return (U32)cast<I32>(x); }
 SI F rcp(F x) {
-#if defined(__AVX2__)
+#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
    __m256 lo,hi;
    split(x, &lo,&hi);
    return join<F>(_mm256_rcp_ps(lo), _mm256_rcp_ps(hi));
-#elif defined(__SSE__)
+#elif defined(JUMPER_IS_SSE2) || defined(JUMPER_IS_SSE41) || defined(JUMPER_IS_AVX)
    __m128 lo,hi;
    split(x, &lo,&hi);
    return join<F>(_mm_rcp_ps(lo), _mm_rcp_ps(hi));
@ -2438,11 +2438,11 @@ SI F rcp(F x) {
 #endif
 }
 SI F sqrt_(F x) {
-#if defined(__AVX2__)
+#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
    __m256 lo,hi;
    split(x, &lo,&hi);
    return join<F>(_mm256_sqrt_ps(lo), _mm256_sqrt_ps(hi));
-#elif defined(__SSE__)
+#elif defined(JUMPER_IS_SSE2) || defined(JUMPER_IS_SSE41) || defined(JUMPER_IS_AVX)
    __m128 lo,hi;
    split(x, &lo,&hi);
    return join<F>(_mm_sqrt_ps(lo), _mm_sqrt_ps(hi));
@ -2473,11 +2473,11 @@ SI F floor_(F x) {
    float32x4_t lo,hi;
    split(x, &lo,&hi);
    return join<F>(vrndmq_f32(lo), vrndmq_f32(hi));
-#elif defined(__AVX2__)
+#elif defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
    __m256 lo,hi;
    split(x, &lo,&hi);
    return join<F>(_mm256_floor_ps(lo), _mm256_floor_ps(hi));
-#elif defined(__SSE4_1__)
+#elif defined(JUMPER_IS_SSE41) || defined(JUMPER_IS_AVX)
    __m128 lo,hi;
    split(x, &lo,&hi);
    return join<F>(_mm_floor_ps(lo), _mm_floor_ps(hi));
@ -2666,7 +2666,7 @@ SI V load(const T* ptr, size_t tail) {
    V v = 0;
    switch (tail & (N-1)) {
        case  0: memcpy(&v, ptr, sizeof(v)); break;
-    #if defined(__AVX2__)
+    #if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
        case 15: v[14] = ptr[14];
        case 14: v[13] = ptr[13];
        case 13: v[12] = ptr[12];
@ -2690,7 +2690,7 @@ template <typename V, typename T>
 SI void store(T* ptr, size_t tail, V v) {
    switch (tail & (N-1)) {
        case  0: memcpy(ptr, &v, sizeof(v)); break;
-    #if defined(__AVX2__)
+    #if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
        case 15: ptr[14] = v[14];
        case 14: ptr[13] = v[13];
        case 13: ptr[12] = v[12];
@ -2710,7 +2710,7 @@ SI void store(T* ptr, size_t tail, V v) {
    }
 }
-#if defined(__AVX2__)
+#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
    template <typename V, typename T>
    SI V gather(const T* ptr, U32 ix) {
        return V{ ptr[ix[ 0]], ptr[ix[ 1]], ptr[ix[ 2]], ptr[ix[ 3]],
@ -2748,7 +2748,7 @@ SI void store(T* ptr, size_t tail, V v) {
 // ~~~~~~ 32-bit memory loads and stores ~~~~~~ //
 SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
-#if 1 && defined(__AVX2__)
+#if 1 && defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
    // Swap the middle 128-bit lanes to make _mm256_packus_epi32() in cast_U16() work out nicely.
    __m256i _01,_23;
    split(rgba, &_01, &_23);
@ -3084,7 +3084,7 @@ SI void gradient_lookup(const SkJumper_GradientCtx* c, U32 idx, F t,
                        U16* r, U16* g, U16* b, U16* a) {
    F fr, fg, fb, fa, br, bg, bb, ba;
-#if defined(__AVX2__)
+#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
    if (c->stopCount <=8) {
        __m256i lo, hi;
        split(idx, &lo, &hi);