Add SSSE3 acceleration for S32_D16_filter_DX

With this CL, related nanobench can be improved for 565 config.
         bitmap_BGRA_8888_update_scale_bilerp   76.1us -> 46.7us        0.61x
                bitmap_BGRA_8888_scale_bilerp   78.7us ->   47us        0.6x
bitmap_BGRA_8888_update_volatile_scale_bilerp   82.7us -> 46.9us        0.57x

BUG=skia:

Review URL: https://codereview.chromium.org/788853002
This commit is contained in:
qiankun.miao 2014-12-10 07:21:35 -08:00 committed by Commit bot
parent 67f2eb45b9
commit 72b0c05fc1
3 changed files with 25 additions and 1 deletions

View File

@ -732,6 +732,17 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
S32_generic_D32_filter_DXDY_SSSE3<true>(s, xy, count, colors);
}
void S32_D16_filter_DX_SSSE3(const SkBitmapProcState& s,
const uint32_t* xy,
int count, uint16_t* colors) {
SkASSERT(254 >= count);
SkAutoSTMalloc<254, uint32_t> colors32(count);
S32_generic_D32_filter_DX_SSSE3<false>(s, xy, count, colors32);
for(int i = 0; i < count; i++) {
*colors++ = SkPixel32ToPixel16(colors32[i]);
}
}
void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
const uint32_t* xy,
int count, uint16_t* colors) {
@ -769,6 +780,12 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
sk_throw();
}
void S32_D16_filter_DX_SSSE3(const SkBitmapProcState& s,
const uint32_t* xy,
int count, uint16_t* colors) {
sk_throw();
}
void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
const uint32_t* xy,
int count, uint16_t* colors) {

View File

@ -23,6 +23,9 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
const uint32_t* xy,
int count, uint32_t* colors);
void S32_D16_filter_DX_SSSE3(const SkBitmapProcState& s,
const uint32_t* xy,
int count, uint16_t* colors);
void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
const uint32_t* xy,
int count, uint16_t* colors);

View File

@ -176,7 +176,11 @@ void SkBitmapProcState::platformProcs() {
/* Check fSampleProc16 */
if (fSampleProc16 == S32_D16_filter_DX) {
fSampleProc16 = S32_D16_filter_DX_SSE2;
if (ssse3) {
fSampleProc16 = S32_D16_filter_DX_SSSE3;
} else {
fSampleProc16 = S32_D16_filter_DX_SSE2;
}
} else if (ssse3 && fSampleProc16 == S32_D16_filter_DXDY) {
fSampleProc16 = S32_D16_filter_DXDY_SSSE3;
}