Add SSSE3 acceleration for S32_D16_filter_DX
With this CL, related nanobench can be improved for 565 config. bitmap_BGRA_8888_update_scale_bilerp 76.1us -> 46.7us 0.61x bitmap_BGRA_8888_scale_bilerp 78.7us -> 47us 0.6x bitmap_BGRA_8888_update_volatile_scale_bilerp 82.7us -> 46.9us 0.57x BUG=skia: Review URL: https://codereview.chromium.org/788853002
This commit is contained in:
parent
67f2eb45b9
commit
72b0c05fc1
@ -732,6 +732,17 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
|
|||||||
S32_generic_D32_filter_DXDY_SSSE3<true>(s, xy, count, colors);
|
S32_generic_D32_filter_DXDY_SSSE3<true>(s, xy, count, colors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void S32_D16_filter_DX_SSSE3(const SkBitmapProcState& s,
|
||||||
|
const uint32_t* xy,
|
||||||
|
int count, uint16_t* colors) {
|
||||||
|
SkASSERT(254 >= count);
|
||||||
|
SkAutoSTMalloc<254, uint32_t> colors32(count);
|
||||||
|
S32_generic_D32_filter_DX_SSSE3<false>(s, xy, count, colors32);
|
||||||
|
for(int i = 0; i < count; i++) {
|
||||||
|
*colors++ = SkPixel32ToPixel16(colors32[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
|
void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
|
||||||
const uint32_t* xy,
|
const uint32_t* xy,
|
||||||
int count, uint16_t* colors) {
|
int count, uint16_t* colors) {
|
||||||
@ -769,6 +780,12 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
|
|||||||
sk_throw();
|
sk_throw();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void S32_D16_filter_DX_SSSE3(const SkBitmapProcState& s,
|
||||||
|
const uint32_t* xy,
|
||||||
|
int count, uint16_t* colors) {
|
||||||
|
sk_throw();
|
||||||
|
}
|
||||||
|
|
||||||
void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
|
void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
|
||||||
const uint32_t* xy,
|
const uint32_t* xy,
|
||||||
int count, uint16_t* colors) {
|
int count, uint16_t* colors) {
|
||||||
|
@ -23,6 +23,9 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
|
|||||||
const uint32_t* xy,
|
const uint32_t* xy,
|
||||||
int count, uint32_t* colors);
|
int count, uint32_t* colors);
|
||||||
|
|
||||||
|
void S32_D16_filter_DX_SSSE3(const SkBitmapProcState& s,
|
||||||
|
const uint32_t* xy,
|
||||||
|
int count, uint16_t* colors);
|
||||||
void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
|
void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
|
||||||
const uint32_t* xy,
|
const uint32_t* xy,
|
||||||
int count, uint16_t* colors);
|
int count, uint16_t* colors);
|
||||||
|
@ -176,7 +176,11 @@ void SkBitmapProcState::platformProcs() {
|
|||||||
|
|
||||||
/* Check fSampleProc16 */
|
/* Check fSampleProc16 */
|
||||||
if (fSampleProc16 == S32_D16_filter_DX) {
|
if (fSampleProc16 == S32_D16_filter_DX) {
|
||||||
fSampleProc16 = S32_D16_filter_DX_SSE2;
|
if (ssse3) {
|
||||||
|
fSampleProc16 = S32_D16_filter_DX_SSSE3;
|
||||||
|
} else {
|
||||||
|
fSampleProc16 = S32_D16_filter_DX_SSE2;
|
||||||
|
}
|
||||||
} else if (ssse3 && fSampleProc16 == S32_D16_filter_DXDY) {
|
} else if (ssse3 && fSampleProc16 == S32_D16_filter_DXDY) {
|
||||||
fSampleProc16 = S32_D16_filter_DXDY_SSSE3;
|
fSampleProc16 = S32_D16_filter_DXDY_SSSE3;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user