remove unnecessary hsw opts

These are copies of the ssse3 versions of these routines.
Instead of copying those routines, just use them.

Add inline to _portable methods to stifle warnings that
they are unused when compiling for HSW.

Change-Id: I655a844473bb3336dd2d1a80c1390259261b6243
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/294295
Reviewed-by: Shuangshuang Zhou <shuangshuang.zhou@intel.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2020-06-04 11:15:10 -05:00 committed by Skia Commit-Bot
parent 7910a8f761
commit 88c6d72b42
2 changed files with 3 additions and 46 deletions

View File

@ -28,8 +28,6 @@ namespace SkOpts {
RGBA_to_BGRA = SK_OPTS_NS::RGBA_to_BGRA; RGBA_to_BGRA = SK_OPTS_NS::RGBA_to_BGRA;
RGBA_to_rgbA = SK_OPTS_NS::RGBA_to_rgbA; RGBA_to_rgbA = SK_OPTS_NS::RGBA_to_rgbA;
RGBA_to_bgrA = SK_OPTS_NS::RGBA_to_bgrA; RGBA_to_bgrA = SK_OPTS_NS::RGBA_to_bgrA;
RGB_to_RGB1 = SK_OPTS_NS::RGB_to_RGB1;
RGB_to_BGR1 = SK_OPTS_NS::RGB_to_BGR1;
gray_to_RGB1 = SK_OPTS_NS::gray_to_RGB1; gray_to_RGB1 = SK_OPTS_NS::gray_to_RGB1;
grayA_to_RGBA = SK_OPTS_NS::grayA_to_RGBA; grayA_to_RGBA = SK_OPTS_NS::grayA_to_RGBA;
grayA_to_rgbA = SK_OPTS_NS::grayA_to_rgbA; grayA_to_rgbA = SK_OPTS_NS::grayA_to_rgbA;

View File

@ -65,7 +65,7 @@ static void RGBA_to_BGRA_portable(uint32_t* dst, const uint32_t* src, int count)
} }
} }
static void RGB_to_RGB1_portable(uint32_t dst[], const uint8_t* src, int count) { static inline void RGB_to_RGB1_portable(uint32_t dst[], const uint8_t* src, int count) {
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
uint8_t r = src[0], uint8_t r = src[0],
g = src[1], g = src[1],
@ -78,7 +78,7 @@ static void RGB_to_RGB1_portable(uint32_t dst[], const uint8_t* src, int count)
} }
} }
static void RGB_to_BGR1_portable(uint32_t dst[], const uint8_t* src, int count) { static inline void RGB_to_BGR1_portable(uint32_t dst[], const uint8_t* src, int count) {
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
uint8_t r = src[0], uint8_t r = src[0],
g = src[1], g = src[1],
@ -580,48 +580,7 @@ static void premul_should_swapRB(bool kSwapRB, uint32_t* dst, const uint32_t* sr
RGBA_to_BGRA_portable(dst, src, count); RGBA_to_BGRA_portable(dst, src, count);
} }
// Use SSSE3 impl as AVX2 impl regresses performance on some platforms. // We saw no benefit from AVX2 over SSSE3 for RGB_to_RGB1 / RGB_to_BGR1.
static void insert_alpha_should_swaprb(bool kSwapRB,
uint32_t dst[], const uint8_t* src, int count) {
const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
__m128i expand;
const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant.
if (kSwapRB) {
expand = _mm_setr_epi8(2,1,0,X, 5,4,3,X, 8,7,6,X, 11,10,9,X);
} else {
expand = _mm_setr_epi8(0,1,2,X, 3,4,5,X, 6,7,8,X, 9,10,11,X);
}
while (count >= 6) {
// Load a vector. While this actually contains 5 pixels plus an
// extra component, we will discard all but the first four pixels on
// this iteration.
__m128i rgb = _mm_loadu_si128((const __m128i*) src);
// Expand the first four pixels to RGBX and then mask to RGB(FF).
__m128i rgba = _mm_or_si128(_mm_shuffle_epi8(rgb, expand), alphaMask);
// Store 4 pixels.
_mm_storeu_si128((__m128i*) dst, rgba);
src += 4*3;
dst += 4;
count -= 4;
}
// Call portable code to finish up the tail of [0,4) pixels.
auto proc = kSwapRB ? RGB_to_BGR1_portable : RGB_to_RGB1_portable;
proc(dst, src, count);
}
/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
insert_alpha_should_swaprb(false, dst, src, count);
}
/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const uint8_t* src, int count) {
insert_alpha_should_swaprb(true, dst, src, count);
}
/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const uint8_t* src, int count) { /*not static*/ inline void gray_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
const __m256i alphas = _mm256_set1_epi8((uint8_t) 0xFF); const __m256i alphas = _mm256_set1_epi8((uint8_t) 0xFF);