Reland "standardize macro checks in SkRasterPipeline_opts"

This is a reland of 75d25c4c8f

Last time I screwed up by replacing defined(__AVX2__) with
defined(JUMPER_IS_HSW).  When __AVX2__ is defined, we'll
be using AVX2, _or better_.

This time around I think I've got it all straight
translating the old __FOO__ into the equivalent JUMPER_IS_FOO.

Last time I also missed a defined(__SSE__), which has been
translated into JUMPER_IS_FOOs now too.

Original change's description:
> standardize macro checks in SkRasterPipeline_opts
>
> We mostly check JUMPER_IS_FOO macros, but in a few places we are
> checking the raw __FOO__ compiler macros instead.  If only for clarity,
> switch to JUMPER_IS_FOO.
>
> This is also a step towards me being able to control the instruction
> set we choose without having to #define these protected __FOO__ macros.
>
> Change-Id: Ieea2090ff658399e27746e0bb8ce950b06f9efb8
> Reviewed-on: https://skia-review.googlesource.com/150961
> Commit-Queue: Brian Osman <brianosman@google.com>
> Auto-Submit: Mike Klein <mtklein@google.com>
> Reviewed-by: Brian Osman <brianosman@google.com>

Change-Id: I656d89e3d3cd7fa23fd618c80e59908fd2b31329
Reviewed-on: https://skia-review.googlesource.com/150965
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2018-08-31 10:19:21 -04:00 committed by Skia Commit-Bot
parent 545aa936f5
commit 83e86ebf3c

View File

@ -2191,7 +2191,7 @@ namespace lowp {
#else // We are compiling vector code with Clang... let's make some lowp stages!
#if defined(__AVX2__)
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
using U8 = uint8_t __attribute__((ext_vector_type(16)));
using U16 = uint16_t __attribute__((ext_vector_type(16)));
using I16 = int16_t __attribute__((ext_vector_type(16)));
@ -2417,11 +2417,11 @@ SI F mad(F f, F m, F a) { return f*m+a; }
SI U32 trunc_(F x) { return (U32)cast<I32>(x); }
SI F rcp(F x) {
#if defined(__AVX2__)
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
__m256 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm256_rcp_ps(lo), _mm256_rcp_ps(hi));
#elif defined(__SSE__)
#elif defined(JUMPER_IS_SSE2) || defined(JUMPER_IS_SSE41) || defined(JUMPER_IS_AVX)
__m128 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm_rcp_ps(lo), _mm_rcp_ps(hi));
@ -2438,11 +2438,11 @@ SI F rcp(F x) {
#endif
}
SI F sqrt_(F x) {
#if defined(__AVX2__)
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
__m256 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm256_sqrt_ps(lo), _mm256_sqrt_ps(hi));
#elif defined(__SSE__)
#elif defined(JUMPER_IS_SSE2) || defined(JUMPER_IS_SSE41) || defined(JUMPER_IS_AVX)
__m128 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm_sqrt_ps(lo), _mm_sqrt_ps(hi));
@ -2473,11 +2473,11 @@ SI F floor_(F x) {
float32x4_t lo,hi;
split(x, &lo,&hi);
return join<F>(vrndmq_f32(lo), vrndmq_f32(hi));
#elif defined(__AVX2__)
#elif defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
__m256 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm256_floor_ps(lo), _mm256_floor_ps(hi));
#elif defined(__SSE4_1__)
#elif defined(JUMPER_IS_SSE41) || defined(JUMPER_IS_AVX)
__m128 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm_floor_ps(lo), _mm_floor_ps(hi));
@ -2666,7 +2666,7 @@ SI V load(const T* ptr, size_t tail) {
V v = 0;
switch (tail & (N-1)) {
case 0: memcpy(&v, ptr, sizeof(v)); break;
#if defined(__AVX2__)
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
case 15: v[14] = ptr[14];
case 14: v[13] = ptr[13];
case 13: v[12] = ptr[12];
@ -2690,7 +2690,7 @@ template <typename V, typename T>
SI void store(T* ptr, size_t tail, V v) {
switch (tail & (N-1)) {
case 0: memcpy(ptr, &v, sizeof(v)); break;
#if defined(__AVX2__)
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
case 15: ptr[14] = v[14];
case 14: ptr[13] = v[13];
case 13: ptr[12] = v[12];
@ -2710,7 +2710,7 @@ SI void store(T* ptr, size_t tail, V v) {
}
}
#if defined(__AVX2__)
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
template <typename V, typename T>
SI V gather(const T* ptr, U32 ix) {
return V{ ptr[ix[ 0]], ptr[ix[ 1]], ptr[ix[ 2]], ptr[ix[ 3]],
@ -2748,7 +2748,7 @@ SI void store(T* ptr, size_t tail, V v) {
// ~~~~~~ 32-bit memory loads and stores ~~~~~~ //
SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
#if 1 && defined(__AVX2__)
#if 1 && defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
// Swap the middle 128-bit lanes to make _mm256_packus_epi32() in cast_U16() work out nicely.
__m256i _01,_23;
split(rgba, &_01, &_23);
@ -3084,7 +3084,7 @@ SI void gradient_lookup(const SkJumper_GradientCtx* c, U32 idx, F t,
U16* r, U16* g, U16* b, U16* a) {
F fr, fg, fb, fa, br, bg, bb, ba;
#if defined(__AVX2__)
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
if (c->stopCount <=8) {
__m256i lo, hi;
split(idx, &lo, &hi);