NEON optimizations for gray -> RGBA (or BGRA) conversions
Swizzle Bench Runtime Nexus 6P 0.32x Nexus 9 0.89x PNG Decode Time (for test set of gray encoded PNGs) Nexus 6P 0.88x Nexus 9 0.91x BUG=skia:4767 GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1656383002 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1656383002
This commit is contained in:
parent
c92159c825
commit
2eff71c9b5
@ -32,3 +32,4 @@ DEF_BENCH(return new SwizzleBench("SkOpts::RGBA_to_bgrA", SkOpts::RGBA_to_bgrA))
|
||||
DEF_BENCH(return new SwizzleBench("SkOpts::RGBA_to_BGRA", SkOpts::RGBA_to_BGRA));
|
||||
DEF_BENCH(return new SwizzleBench("SkOpts::RGB_to_RGB1", SkOpts::RGB_to_RGB1));
|
||||
DEF_BENCH(return new SwizzleBench("SkOpts::RGB_to_BGR1", SkOpts::RGB_to_BGR1));
|
||||
DEF_BENCH(return new SwizzleBench("SkOpts::gray_to_RGB1", SkOpts::gray_to_RGB1));
|
||||
|
@ -270,6 +270,19 @@ static void swizzle_gray_to_n32(
|
||||
}
|
||||
}
|
||||
|
||||
static void fast_swizzle_gray_to_n32(
|
||||
void* dst, const uint8_t* src, int width, int bpp, int deltaSrc, int offset,
|
||||
const SkPMColor ctable[]) {
|
||||
|
||||
// This function must not be called if we are sampling. If we are not
|
||||
// sampling, deltaSrc should equal bpp.
|
||||
SkASSERT(deltaSrc == bpp);
|
||||
|
||||
// Note that there is no need to distinguish between RGB and BGR.
|
||||
// Each color channel will get the same value.
|
||||
SkOpts::gray_to_RGB1((uint32_t*) dst, src + offset, width);
|
||||
}
|
||||
|
||||
static void swizzle_gray_to_565(
|
||||
void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
|
||||
int bytesPerPixel, int deltaSrc, int offset, const SkPMColor ctable[]) {
|
||||
@ -639,6 +652,7 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc,
|
||||
switch (dstInfo.colorType()) {
|
||||
case kN32_SkColorType:
|
||||
proc = &swizzle_gray_to_n32;
|
||||
fastProc = &fast_swizzle_gray_to_n32;
|
||||
break;
|
||||
case kGray_8_SkColorType:
|
||||
proc = &sample1;
|
||||
|
@ -84,6 +84,7 @@ namespace SkOpts {
|
||||
decltype(RGBA_to_bgrA) RGBA_to_bgrA = sk_default::RGBA_to_bgrA;
|
||||
decltype(RGB_to_RGB1) RGB_to_RGB1 = sk_default::RGB_to_RGB1;
|
||||
decltype(RGB_to_BGR1) RGB_to_BGR1 = sk_default::RGB_to_BGR1;
|
||||
decltype(gray_to_RGB1) gray_to_RGB1 = sk_default::gray_to_RGB1;
|
||||
|
||||
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
|
||||
void Init_ssse3();
|
||||
|
@ -61,7 +61,8 @@ namespace SkOpts {
|
||||
RGBA_to_rgbA, // i.e. just premultiply
|
||||
RGBA_to_bgrA, // i.e. swap RB and premultiply
|
||||
RGB_to_RGB1, // i.e. insert an opaque alpha
|
||||
RGB_to_BGR1; // i.e. swap RB and insert an opaque alpha
|
||||
RGB_to_BGR1, // i.e. swap RB and insert an opaque alpha
|
||||
gray_to_RGB1; // i.e. set color channels to same value + an opaque alpha
|
||||
}
|
||||
|
||||
#endif//SkOpts_DEFINED
|
||||
|
@ -52,5 +52,6 @@ namespace SkOpts {
|
||||
RGBA_to_bgrA = sk_neon::RGBA_to_bgrA;
|
||||
RGB_to_RGB1 = sk_neon::RGB_to_RGB1;
|
||||
RGB_to_BGR1 = sk_neon::RGB_to_BGR1;
|
||||
gray_to_RGB1 = sk_neon::gray_to_RGB1;
|
||||
}
|
||||
}
|
||||
|
@ -23,5 +23,6 @@ namespace SkOpts {
|
||||
RGBA_to_bgrA = sk_ssse3::RGBA_to_bgrA;
|
||||
RGB_to_RGB1 = sk_ssse3::RGB_to_RGB1;
|
||||
RGB_to_BGR1 = sk_ssse3::RGB_to_BGR1;
|
||||
gray_to_RGB1 = sk_ssse3::gray_to_RGB1;
|
||||
}
|
||||
}
|
||||
|
@ -88,6 +88,16 @@ static void RGB_to_BGR1_portable(uint32_t dst[], const void* vsrc, int count) {
|
||||
}
|
||||
}
|
||||
|
||||
static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) {
|
||||
const uint8_t* src = (const uint8_t*)vsrc;
|
||||
for (int i = 0; i < count; i++) {
|
||||
dst[i] = (uint32_t)0xFF << 24
|
||||
| (uint32_t)src[i] << 16
|
||||
| (uint32_t)src[i] << 8
|
||||
| (uint32_t)src[i] << 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(SK_ARM_HAS_NEON)
|
||||
|
||||
// Rounded divide by 255, (x + 127) / 255
|
||||
@ -260,6 +270,47 @@ static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
|
||||
insert_alpha_should_swaprb<true>(dst, src, count);
|
||||
}
|
||||
|
||||
static void gray_to_RGB1(uint32_t dst[], const void* vsrc, int count) {
|
||||
const uint8_t* src = (const uint8_t*) vsrc;
|
||||
while (count >= 16) {
|
||||
// Load 16 pixels.
|
||||
uint8x16_t gray = vld1q_u8(src);
|
||||
|
||||
// Set each of the color channels.
|
||||
uint8x16x4_t rgba;
|
||||
rgba.val[0] = gray;
|
||||
rgba.val[1] = gray;
|
||||
rgba.val[2] = gray;
|
||||
rgba.val[3] = vdupq_n_u8(0xFF);
|
||||
|
||||
// Store 16 pixels.
|
||||
vst4q_u8((uint8_t*) dst, rgba);
|
||||
src += 16;
|
||||
dst += 16;
|
||||
count -= 16;
|
||||
}
|
||||
|
||||
if (count >= 8) {
|
||||
// Load 8 pixels.
|
||||
uint8x8_t gray = vld1_u8(src);
|
||||
|
||||
// Set each of the color channels.
|
||||
uint8x8x4_t rgba;
|
||||
rgba.val[0] = gray;
|
||||
rgba.val[1] = gray;
|
||||
rgba.val[2] = gray;
|
||||
rgba.val[3] = vdup_n_u8(0xFF);
|
||||
|
||||
// Store 8 pixels.
|
||||
vst4_u8((uint8_t*) dst, rgba);
|
||||
src += 8;
|
||||
dst += 8;
|
||||
count -= 8;
|
||||
}
|
||||
|
||||
gray_to_RGB1_portable(dst, src, count);
|
||||
}
|
||||
|
||||
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
|
||||
|
||||
template <bool kSwapRB>
|
||||
@ -401,6 +452,10 @@ static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
|
||||
insert_alpha_should_swaprb<true>(dst, src, count);
|
||||
}
|
||||
|
||||
static void gray_to_RGB1(uint32_t dst[], const void* src, int count) {
|
||||
gray_to_RGB1_portable(dst, src, count);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {
|
||||
@ -423,6 +478,10 @@ static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
|
||||
RGB_to_BGR1_portable(dst, src, count);
|
||||
}
|
||||
|
||||
static void gray_to_RGB1(uint32_t dst[], const void* src, int count) {
|
||||
gray_to_RGB1_portable(dst, src, count);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user