Some small SkJumper refactoring.

No generated code changes. Change-Id: I2d480b5391f8246a01118766a9522d528a87f75a Reviewed-on: https://skia-review.googlesource.com/9129 Reviewed-by: Mike Klein <mtklein@chromium.org> Commit-Queue: Mike Klein <mtklein@chromium.org>
2017-03-01 21:49:23 -05:00 · 2017-03-01 21:49:23 -05:00 · 9c10df3b60
commit 9c10df3b60
parent 580ffa0fb1
2 changed files with 22 additions and 46 deletions
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@ -114,7 +114,7 @@ static Dst bit_cast(const Src& src) {

    #define WRAP(name) sk_##name##_vfp4

-#elif defined(__AVX2__) && defined(__FMA__) && defined(__F16C__)
+#elif defined(__AVX__)
    #include <immintrin.h>

    // These are __m256 and __m256i, but friendlier and strongly-typed.
@ -124,7 +124,14 @@ static Dst bit_cast(const Src& src) {
    using U16 = uint16_t __attribute__((ext_vector_type(8)));
    using U8  = uint8_t  __attribute__((ext_vector_type(8)));

-    static F   mad(F f, F m, F a)  { return _mm256_fmadd_ps(f,m,a);}
+    static F mad(F f, F m, F a)  {
+    #if defined(__FMA__)
+        return _mm256_fmadd_ps(f,m,a);
+    #else
+        return f*m+a;
+    #endif
+    }
+
    static F   min(F a, F b)       { return _mm256_min_ps(a,b);    }
    static F   max(F a, F b)       { return _mm256_max_ps(a,b);    }
    static F   abs_(F v)           { return _mm256_and_ps(v, 0-v); }
@ -134,57 +141,30 @@ static Dst bit_cast(const Src& src) {
    static U32 round(F v, F scale) { return _mm256_cvtps_epi32(v*scale); }

    static U16 pack(U32 v) {
-        __m128i lo = _mm256_extractf128_si256(v, 0),
-                hi = _mm256_extractf128_si256(v, 1);
-        return _mm_packus_epi32(lo, hi);
+        return _mm_packus_epi32(_mm256_extractf128_si256(v, 0),
+                                _mm256_extractf128_si256(v, 1));
    }
    static U8 pack(U16 v) {
-        __m128i r = _mm_packus_epi16(v,v);
-        return unaligned_load<U8>(&r);
-    }
-
-    static F if_then_else(I32 c, F t, F e) { return _mm256_blendv_ps(e,t,c); }
-
-    static F gather(const float* p, U32 ix) { return _mm256_i32gather_ps(p, ix, 4); }
-
-    #define WRAP(name) sk_##name##_hsw
-
-#elif defined(__AVX__)
-    #include <immintrin.h>
-
-    using F   = float    __attribute__((ext_vector_type(8)));
-    using I32 =  int32_t __attribute__((ext_vector_type(8)));
-    using U32 = uint32_t __attribute__((ext_vector_type(8)));
-    using U16 = uint16_t __attribute__((ext_vector_type(8)));
-    using U8  = uint8_t  __attribute__((ext_vector_type(8)));
-
-    static F   mad(F f, F m, F a)  { return f*m+a;                 }
-    static F   min(F a, F b)       { return _mm256_min_ps(a,b);    }
-    static F   max(F a, F b)       { return _mm256_max_ps(a,b);    }
-    static F   abs_(F v)           { return _mm256_and_ps(v, 0-v); }
-    static F   floor(F v, K*)      { return _mm256_floor_ps(v);    }
-    static F   rcp  (F v)          { return _mm256_rcp_ps  (v);    }
-    static F   rsqrt(F v)          { return _mm256_rsqrt_ps(v);    }
-    static U32 round(F v, F scale) { return _mm256_cvtps_epi32(v*scale); }
-
-    static U16 pack(U32 v) {
-        __m128i lo = _mm256_extractf128_si256(v, 0),
-                hi = _mm256_extractf128_si256(v, 1);
-        return _mm_packus_epi32(lo, hi);
-    }
-    static U8 pack(U16 v) {
-        __m128i r = _mm_packus_epi16(v,v);
+        auto r = _mm_packus_epi16(v,v);
        return unaligned_load<U8>(&r);
    }

    static F if_then_else(I32 c, F t, F e) { return _mm256_blendv_ps(e,t,c); }

    static F gather(const float* p, U32 ix) {
+    #if defined(__AVX2__)
+        return _mm256_i32gather_ps(p, ix, 4);
+    #else
        return { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]],
                 p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]], };
+    #endif
    }

-    #define WRAP(name) sk_##name##_avx
+    #if defined(__AVX2__) && defined(__F16C__) && defined(__FMA__)
+        #define WRAP(name) sk_##name##_hsw
+    #else
+        #define WRAP(name) sk_##name##_avx
+    #endif

 #elif defined(__SSE2__)
    #include <immintrin.h>
@ -221,11 +201,7 @@ static Dst bit_cast(const Src& src) {
    }

    static F if_then_else(I32 c, F t, F e) {
-    #if defined(__SSE4_1__)
-        return _mm_blendv_ps(e,t,c);
-    #else
        return _mm_or_ps(_mm_and_ps(c, t), _mm_andnot_ps(c, e));
-    #endif
    }

    static F floor(F v, K* k) {
--- a/src/jumper/build_stages.py
+++ b/src/jumper/build_stages.py
@ -10,7 +10,7 @@ import subprocess
 import sys

 #clang = ['clang++']
-clang = ['clang-3.9', '-x', 'c++']
+clang = ['ccache', 'clang-3.9', '-x', 'c++']

 ndk = '/Users/mtklein/brew/opt/android-ndk/'
 objdump = 'gobjdump'