Reland "standardize macro checks in SkRasterPipeline_opts"
This is a reland of 75d25c4c8f
Last time I screwed up by replacing defined(__AVX2__) with
defined(JUMPER_IS_HSW). When __AVX2__ is defined, we'll
be using AVX2, _or better_.
This time around I think I've got it all straight
translating the old __FOO__ into the equivalent JUMPER_IS_FOO.
Last time I also missed a defined(__SSE__), which has been
translated into JUMPER_IS_FOOs now too.
Original change's description:
> standardize macro checks in SkRasterPipeline_opts
>
> We mostly check JUMPER_IS_FOO macros, but in a few places we are
> checking the raw __FOO__ compiler macros instead. If only for clarity,
> switch to JUMPER_IS_FOO.
>
> This is also a step towards me being able to control the instruction
> set we choose without having to #define these protected __FOO__ macros.
>
> Change-Id: Ieea2090ff658399e27746e0bb8ce950b06f9efb8
> Reviewed-on: https://skia-review.googlesource.com/150961
> Commit-Queue: Brian Osman <brianosman@google.com>
> Auto-Submit: Mike Klein <mtklein@google.com>
> Reviewed-by: Brian Osman <brianosman@google.com>
Change-Id: I656d89e3d3cd7fa23fd618c80e59908fd2b31329
Reviewed-on: https://skia-review.googlesource.com/150965
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
parent
545aa936f5
commit
83e86ebf3c
@ -2191,7 +2191,7 @@ namespace lowp {
|
|||||||
|
|
||||||
#else // We are compiling vector code with Clang... let's make some lowp stages!
|
#else // We are compiling vector code with Clang... let's make some lowp stages!
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
|
||||||
using U8 = uint8_t __attribute__((ext_vector_type(16)));
|
using U8 = uint8_t __attribute__((ext_vector_type(16)));
|
||||||
using U16 = uint16_t __attribute__((ext_vector_type(16)));
|
using U16 = uint16_t __attribute__((ext_vector_type(16)));
|
||||||
using I16 = int16_t __attribute__((ext_vector_type(16)));
|
using I16 = int16_t __attribute__((ext_vector_type(16)));
|
||||||
@ -2417,11 +2417,11 @@ SI F mad(F f, F m, F a) { return f*m+a; }
|
|||||||
SI U32 trunc_(F x) { return (U32)cast<I32>(x); }
|
SI U32 trunc_(F x) { return (U32)cast<I32>(x); }
|
||||||
|
|
||||||
SI F rcp(F x) {
|
SI F rcp(F x) {
|
||||||
#if defined(__AVX2__)
|
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
|
||||||
__m256 lo,hi;
|
__m256 lo,hi;
|
||||||
split(x, &lo,&hi);
|
split(x, &lo,&hi);
|
||||||
return join<F>(_mm256_rcp_ps(lo), _mm256_rcp_ps(hi));
|
return join<F>(_mm256_rcp_ps(lo), _mm256_rcp_ps(hi));
|
||||||
#elif defined(__SSE__)
|
#elif defined(JUMPER_IS_SSE2) || defined(JUMPER_IS_SSE41) || defined(JUMPER_IS_AVX)
|
||||||
__m128 lo,hi;
|
__m128 lo,hi;
|
||||||
split(x, &lo,&hi);
|
split(x, &lo,&hi);
|
||||||
return join<F>(_mm_rcp_ps(lo), _mm_rcp_ps(hi));
|
return join<F>(_mm_rcp_ps(lo), _mm_rcp_ps(hi));
|
||||||
@ -2438,11 +2438,11 @@ SI F rcp(F x) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
SI F sqrt_(F x) {
|
SI F sqrt_(F x) {
|
||||||
#if defined(__AVX2__)
|
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
|
||||||
__m256 lo,hi;
|
__m256 lo,hi;
|
||||||
split(x, &lo,&hi);
|
split(x, &lo,&hi);
|
||||||
return join<F>(_mm256_sqrt_ps(lo), _mm256_sqrt_ps(hi));
|
return join<F>(_mm256_sqrt_ps(lo), _mm256_sqrt_ps(hi));
|
||||||
#elif defined(__SSE__)
|
#elif defined(JUMPER_IS_SSE2) || defined(JUMPER_IS_SSE41) || defined(JUMPER_IS_AVX)
|
||||||
__m128 lo,hi;
|
__m128 lo,hi;
|
||||||
split(x, &lo,&hi);
|
split(x, &lo,&hi);
|
||||||
return join<F>(_mm_sqrt_ps(lo), _mm_sqrt_ps(hi));
|
return join<F>(_mm_sqrt_ps(lo), _mm_sqrt_ps(hi));
|
||||||
@ -2473,11 +2473,11 @@ SI F floor_(F x) {
|
|||||||
float32x4_t lo,hi;
|
float32x4_t lo,hi;
|
||||||
split(x, &lo,&hi);
|
split(x, &lo,&hi);
|
||||||
return join<F>(vrndmq_f32(lo), vrndmq_f32(hi));
|
return join<F>(vrndmq_f32(lo), vrndmq_f32(hi));
|
||||||
#elif defined(__AVX2__)
|
#elif defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
|
||||||
__m256 lo,hi;
|
__m256 lo,hi;
|
||||||
split(x, &lo,&hi);
|
split(x, &lo,&hi);
|
||||||
return join<F>(_mm256_floor_ps(lo), _mm256_floor_ps(hi));
|
return join<F>(_mm256_floor_ps(lo), _mm256_floor_ps(hi));
|
||||||
#elif defined(__SSE4_1__)
|
#elif defined(JUMPER_IS_SSE41) || defined(JUMPER_IS_AVX)
|
||||||
__m128 lo,hi;
|
__m128 lo,hi;
|
||||||
split(x, &lo,&hi);
|
split(x, &lo,&hi);
|
||||||
return join<F>(_mm_floor_ps(lo), _mm_floor_ps(hi));
|
return join<F>(_mm_floor_ps(lo), _mm_floor_ps(hi));
|
||||||
@ -2666,7 +2666,7 @@ SI V load(const T* ptr, size_t tail) {
|
|||||||
V v = 0;
|
V v = 0;
|
||||||
switch (tail & (N-1)) {
|
switch (tail & (N-1)) {
|
||||||
case 0: memcpy(&v, ptr, sizeof(v)); break;
|
case 0: memcpy(&v, ptr, sizeof(v)); break;
|
||||||
#if defined(__AVX2__)
|
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
|
||||||
case 15: v[14] = ptr[14];
|
case 15: v[14] = ptr[14];
|
||||||
case 14: v[13] = ptr[13];
|
case 14: v[13] = ptr[13];
|
||||||
case 13: v[12] = ptr[12];
|
case 13: v[12] = ptr[12];
|
||||||
@ -2690,7 +2690,7 @@ template <typename V, typename T>
|
|||||||
SI void store(T* ptr, size_t tail, V v) {
|
SI void store(T* ptr, size_t tail, V v) {
|
||||||
switch (tail & (N-1)) {
|
switch (tail & (N-1)) {
|
||||||
case 0: memcpy(ptr, &v, sizeof(v)); break;
|
case 0: memcpy(ptr, &v, sizeof(v)); break;
|
||||||
#if defined(__AVX2__)
|
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
|
||||||
case 15: ptr[14] = v[14];
|
case 15: ptr[14] = v[14];
|
||||||
case 14: ptr[13] = v[13];
|
case 14: ptr[13] = v[13];
|
||||||
case 13: ptr[12] = v[12];
|
case 13: ptr[12] = v[12];
|
||||||
@ -2710,7 +2710,7 @@ SI void store(T* ptr, size_t tail, V v) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
|
||||||
template <typename V, typename T>
|
template <typename V, typename T>
|
||||||
SI V gather(const T* ptr, U32 ix) {
|
SI V gather(const T* ptr, U32 ix) {
|
||||||
return V{ ptr[ix[ 0]], ptr[ix[ 1]], ptr[ix[ 2]], ptr[ix[ 3]],
|
return V{ ptr[ix[ 0]], ptr[ix[ 1]], ptr[ix[ 2]], ptr[ix[ 3]],
|
||||||
@ -2748,7 +2748,7 @@ SI void store(T* ptr, size_t tail, V v) {
|
|||||||
// ~~~~~~ 32-bit memory loads and stores ~~~~~~ //
|
// ~~~~~~ 32-bit memory loads and stores ~~~~~~ //
|
||||||
|
|
||||||
SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
|
SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
|
||||||
#if 1 && defined(__AVX2__)
|
#if 1 && defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
|
||||||
// Swap the middle 128-bit lanes to make _mm256_packus_epi32() in cast_U16() work out nicely.
|
// Swap the middle 128-bit lanes to make _mm256_packus_epi32() in cast_U16() work out nicely.
|
||||||
__m256i _01,_23;
|
__m256i _01,_23;
|
||||||
split(rgba, &_01, &_23);
|
split(rgba, &_01, &_23);
|
||||||
@ -3084,7 +3084,7 @@ SI void gradient_lookup(const SkJumper_GradientCtx* c, U32 idx, F t,
|
|||||||
U16* r, U16* g, U16* b, U16* a) {
|
U16* r, U16* g, U16* b, U16* a) {
|
||||||
|
|
||||||
F fr, fg, fb, fa, br, bg, bb, ba;
|
F fr, fg, fb, fa, br, bg, bb, ba;
|
||||||
#if defined(__AVX2__)
|
#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
|
||||||
if (c->stopCount <=8) {
|
if (c->stopCount <=8) {
|
||||||
__m256i lo, hi;
|
__m256i lo, hi;
|
||||||
split(idx, &lo, &hi);
|
split(idx, &lo, &hi);
|
||||||
|
Loading…
Reference in New Issue
Block a user