Optimize CMYK->RGBA (BGRA) transform for jpeg decodes
Swizzle Bench Runtime Nexus 6P 0.14x Dell Venue 8 0.12x CMYK Jpeg Decode Runtime Nexus 6P 0.81x Dell Venue 8 0.85x BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1676773003 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1676773003
This commit is contained in:
parent
3125565804
commit
c5c322d8ec
@ -35,3 +35,5 @@ DEF_BENCH(return new SwizzleBench("SkOpts::RGB_to_BGR1", SkOpts::RGB_to_BGR1));
|
||||
DEF_BENCH(return new SwizzleBench("SkOpts::gray_to_RGB1", SkOpts::gray_to_RGB1));
|
||||
DEF_BENCH(return new SwizzleBench("SkOpts::grayA_to_RGBA", SkOpts::grayA_to_RGBA));
|
||||
DEF_BENCH(return new SwizzleBench("SkOpts::grayA_to_rgbA", SkOpts::grayA_to_rgbA));
|
||||
DEF_BENCH(return new SwizzleBench("SkOpts::inverted_CMYK_to_RGB1", SkOpts::inverted_CMYK_to_RGB1));
|
||||
DEF_BENCH(return new SwizzleBench("SkOpts::inverted_CMYK_to_BGR1", SkOpts::inverted_CMYK_to_BGR1));
|
||||
|
@ -592,6 +592,21 @@ static void swizzle_cmyk_to_n32(
|
||||
}
|
||||
}
|
||||
|
||||
static void fast_swizzle_cmyk_to_n32(
|
||||
void* dst, const uint8_t* src, int width, int bpp, int deltaSrc, int offset,
|
||||
const SkPMColor ctable[]) {
|
||||
|
||||
// This function must not be called if we are sampling. If we are not
|
||||
// sampling, deltaSrc should equal bpp.
|
||||
SkASSERT(deltaSrc == bpp);
|
||||
|
||||
#ifdef SK_PMCOLOR_IS_RGBA
|
||||
SkOpts::inverted_CMYK_to_RGB1((uint32_t*) dst, src + offset, width);
|
||||
#else
|
||||
SkOpts::inverted_CMYK_to_BGR1((uint32_t*) dst, src + offset, width);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void swizzle_cmyk_to_565(
|
||||
void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
|
||||
int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
|
||||
@ -811,6 +826,7 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc,
|
||||
break;
|
||||
case kRGB_565_SkColorType:
|
||||
proc = &swizzle_rgb_to_565;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -844,6 +860,7 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc,
|
||||
switch (dstInfo.colorType()) {
|
||||
case kN32_SkColorType:
|
||||
proc = &swizzle_cmyk_to_n32;
|
||||
fastProc = &fast_swizzle_cmyk_to_n32;
|
||||
break;
|
||||
case kRGB_565_SkColorType:
|
||||
proc = &swizzle_cmyk_to_565;
|
||||
|
@ -79,14 +79,16 @@ namespace SkOpts {
|
||||
decltype(matrix_scale_translate) matrix_scale_translate = sk_default::matrix_scale_translate;
|
||||
decltype(matrix_affine) matrix_affine = sk_default::matrix_affine;
|
||||
|
||||
decltype(RGBA_to_BGRA) RGBA_to_BGRA = sk_default::RGBA_to_BGRA;
|
||||
decltype(RGBA_to_rgbA) RGBA_to_rgbA = sk_default::RGBA_to_rgbA;
|
||||
decltype(RGBA_to_bgrA) RGBA_to_bgrA = sk_default::RGBA_to_bgrA;
|
||||
decltype(RGB_to_RGB1) RGB_to_RGB1 = sk_default::RGB_to_RGB1;
|
||||
decltype(RGB_to_BGR1) RGB_to_BGR1 = sk_default::RGB_to_BGR1;
|
||||
decltype(gray_to_RGB1) gray_to_RGB1 = sk_default::gray_to_RGB1;
|
||||
decltype(grayA_to_RGBA) grayA_to_RGBA = sk_default::grayA_to_RGBA;
|
||||
decltype(grayA_to_rgbA) grayA_to_rgbA = sk_default::grayA_to_rgbA;
|
||||
decltype(RGBA_to_BGRA) RGBA_to_BGRA = sk_default::RGBA_to_BGRA;
|
||||
decltype(RGBA_to_rgbA) RGBA_to_rgbA = sk_default::RGBA_to_rgbA;
|
||||
decltype(RGBA_to_bgrA) RGBA_to_bgrA = sk_default::RGBA_to_bgrA;
|
||||
decltype(RGB_to_RGB1) RGB_to_RGB1 = sk_default::RGB_to_RGB1;
|
||||
decltype(RGB_to_BGR1) RGB_to_BGR1 = sk_default::RGB_to_BGR1;
|
||||
decltype(gray_to_RGB1) gray_to_RGB1 = sk_default::gray_to_RGB1;
|
||||
decltype(grayA_to_RGBA) grayA_to_RGBA = sk_default::grayA_to_RGBA;
|
||||
decltype(grayA_to_rgbA) grayA_to_rgbA = sk_default::grayA_to_rgbA;
|
||||
decltype(inverted_CMYK_to_RGB1) inverted_CMYK_to_RGB1 = sk_default::inverted_CMYK_to_RGB1;
|
||||
decltype(inverted_CMYK_to_BGR1) inverted_CMYK_to_BGR1 = sk_default::inverted_CMYK_to_BGR1;
|
||||
|
||||
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
|
||||
void Init_ssse3();
|
||||
|
@ -57,14 +57,16 @@ namespace SkOpts {
|
||||
|
||||
// Swizzle input into some sort of 8888 pixel, {premul,unpremul} x {rgba,bgra}.
|
||||
typedef void (*Swizzle_8888)(uint32_t*, const void*, int);
|
||||
extern Swizzle_8888 RGBA_to_BGRA, // i.e. just swap RB
|
||||
RGBA_to_rgbA, // i.e. just premultiply
|
||||
RGBA_to_bgrA, // i.e. swap RB and premultiply
|
||||
RGB_to_RGB1, // i.e. insert an opaque alpha
|
||||
RGB_to_BGR1, // i.e. swap RB and insert an opaque alpha
|
||||
gray_to_RGB1, // i.e. expand to color channels + an opaque alpha
|
||||
grayA_to_RGBA, // i.e. expand to color channels
|
||||
grayA_to_rgbA; // i.e. expand to color channels and premultiply
|
||||
extern Swizzle_8888 RGBA_to_BGRA, // i.e. just swap RB
|
||||
RGBA_to_rgbA, // i.e. just premultiply
|
||||
RGBA_to_bgrA, // i.e. swap RB and premultiply
|
||||
RGB_to_RGB1, // i.e. insert an opaque alpha
|
||||
RGB_to_BGR1, // i.e. swap RB and insert an opaque alpha
|
||||
gray_to_RGB1, // i.e. expand to color channels + an opaque alpha
|
||||
grayA_to_RGBA, // i.e. expand to color channels
|
||||
grayA_to_rgbA, // i.e. expand to color channels and premultiply
|
||||
inverted_CMYK_to_RGB1, // i.e. convert color space
|
||||
inverted_CMYK_to_BGR1; // i.e. convert color space
|
||||
}
|
||||
|
||||
#endif//SkOpts_DEFINED
|
||||
|
@ -47,13 +47,15 @@ namespace SkOpts {
|
||||
matrix_scale_translate = sk_neon::matrix_scale_translate;
|
||||
matrix_affine = sk_neon::matrix_affine;
|
||||
|
||||
RGBA_to_BGRA = sk_neon::RGBA_to_BGRA;
|
||||
RGBA_to_rgbA = sk_neon::RGBA_to_rgbA;
|
||||
RGBA_to_bgrA = sk_neon::RGBA_to_bgrA;
|
||||
RGB_to_RGB1 = sk_neon::RGB_to_RGB1;
|
||||
RGB_to_BGR1 = sk_neon::RGB_to_BGR1;
|
||||
gray_to_RGB1 = sk_neon::gray_to_RGB1;
|
||||
grayA_to_RGBA = sk_neon::grayA_to_RGBA;
|
||||
grayA_to_rgbA = sk_neon::grayA_to_rgbA;
|
||||
RGBA_to_BGRA = sk_neon::RGBA_to_BGRA;
|
||||
RGBA_to_rgbA = sk_neon::RGBA_to_rgbA;
|
||||
RGBA_to_bgrA = sk_neon::RGBA_to_bgrA;
|
||||
RGB_to_RGB1 = sk_neon::RGB_to_RGB1;
|
||||
RGB_to_BGR1 = sk_neon::RGB_to_BGR1;
|
||||
gray_to_RGB1 = sk_neon::gray_to_RGB1;
|
||||
grayA_to_RGBA = sk_neon::grayA_to_RGBA;
|
||||
grayA_to_rgbA = sk_neon::grayA_to_rgbA;
|
||||
inverted_CMYK_to_RGB1 = sk_neon::inverted_CMYK_to_RGB1;
|
||||
inverted_CMYK_to_BGR1 = sk_neon::inverted_CMYK_to_BGR1;
|
||||
}
|
||||
}
|
||||
|
@ -18,13 +18,15 @@ namespace SkOpts {
|
||||
blit_mask_d32_a8 = sk_ssse3::blit_mask_d32_a8;
|
||||
color_cube_filter_span = sk_ssse3::color_cube_filter_span;
|
||||
|
||||
RGBA_to_BGRA = sk_ssse3::RGBA_to_BGRA;
|
||||
RGBA_to_rgbA = sk_ssse3::RGBA_to_rgbA;
|
||||
RGBA_to_bgrA = sk_ssse3::RGBA_to_bgrA;
|
||||
RGB_to_RGB1 = sk_ssse3::RGB_to_RGB1;
|
||||
RGB_to_BGR1 = sk_ssse3::RGB_to_BGR1;
|
||||
gray_to_RGB1 = sk_ssse3::gray_to_RGB1;
|
||||
grayA_to_RGBA = sk_ssse3::grayA_to_RGBA;
|
||||
grayA_to_rgbA = sk_ssse3::grayA_to_rgbA;
|
||||
RGBA_to_BGRA = sk_ssse3::RGBA_to_BGRA;
|
||||
RGBA_to_rgbA = sk_ssse3::RGBA_to_rgbA;
|
||||
RGBA_to_bgrA = sk_ssse3::RGBA_to_bgrA;
|
||||
RGB_to_RGB1 = sk_ssse3::RGB_to_RGB1;
|
||||
RGB_to_BGR1 = sk_ssse3::RGB_to_BGR1;
|
||||
gray_to_RGB1 = sk_ssse3::gray_to_RGB1;
|
||||
grayA_to_RGBA = sk_ssse3::grayA_to_RGBA;
|
||||
grayA_to_rgbA = sk_ssse3::grayA_to_rgbA;
|
||||
inverted_CMYK_to_RGB1 = sk_ssse3::inverted_CMYK_to_RGB1;
|
||||
inverted_CMYK_to_BGR1 = sk_ssse3::inverted_CMYK_to_BGR1;
|
||||
}
|
||||
}
|
||||
|
@ -125,6 +125,41 @@ static void grayA_to_rgbA_portable(uint32_t dst[], const void* vsrc, int count)
|
||||
}
|
||||
}
|
||||
|
||||
static void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int count) {
|
||||
const uint32_t* src = (const uint32_t*)vsrc;
|
||||
for (int i = 0; i < count; i++) {
|
||||
uint8_t k = src[i] >> 24,
|
||||
y = src[i] >> 16,
|
||||
m = src[i] >> 8,
|
||||
c = src[i] >> 0;
|
||||
// See comments in SkSwizzler.cpp for details on the conversion formula.
|
||||
uint8_t b = (y*k+127)/255,
|
||||
g = (m*k+127)/255,
|
||||
r = (c*k+127)/255;
|
||||
dst[i] = (uint32_t)0xFF << 24
|
||||
| (uint32_t) b << 16
|
||||
| (uint32_t) g << 8
|
||||
| (uint32_t) r << 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int count) {
|
||||
const uint32_t* src = (const uint32_t*)vsrc;
|
||||
for (int i = 0; i < count; i++) {
|
||||
uint8_t k = src[i] >> 24,
|
||||
y = src[i] >> 16,
|
||||
m = src[i] >> 8,
|
||||
c = src[i] >> 0;
|
||||
uint8_t b = (y*k+127)/255,
|
||||
g = (m*k+127)/255,
|
||||
r = (c*k+127)/255;
|
||||
dst[i] = (uint32_t)0xFF << 24
|
||||
| (uint32_t) r << 16
|
||||
| (uint32_t) g << 8
|
||||
| (uint32_t) b << 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(SK_ARM_HAS_NEON)
|
||||
|
||||
// Rounded divide by 255, (x + 127) / 255
|
||||
@ -401,6 +436,54 @@ static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
|
||||
expand_grayA<true>(dst, src, count);
|
||||
}
|
||||
|
||||
enum Format { kRGB1, kBGR1 };
|
||||
template <Format format>
|
||||
static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) {
|
||||
auto src = (const uint32_t*)vsrc;
|
||||
while (count >= 8) {
|
||||
// Load 8 cmyk pixels.
|
||||
uint8x8x4_t pixels = vld4_u8((const uint8_t*) src);
|
||||
|
||||
uint8x8_t k = pixels.val[3],
|
||||
y = pixels.val[2],
|
||||
m = pixels.val[1],
|
||||
c = pixels.val[0];
|
||||
|
||||
// Scale to r, g, b.
|
||||
uint8x8_t b = scale(y, k);
|
||||
uint8x8_t g = scale(m, k);
|
||||
uint8x8_t r = scale(c, k);
|
||||
|
||||
// Store 8 rgba pixels.
|
||||
if (kBGR1 == format) {
|
||||
pixels.val[3] = vdup_n_u8(0xFF);
|
||||
pixels.val[2] = r;
|
||||
pixels.val[1] = g;
|
||||
pixels.val[0] = b;
|
||||
} else {
|
||||
pixels.val[3] = vdup_n_u8(0xFF);
|
||||
pixels.val[2] = b;
|
||||
pixels.val[1] = g;
|
||||
pixels.val[0] = r;
|
||||
}
|
||||
vst4_u8((uint8_t*) dst, pixels);
|
||||
src += 8;
|
||||
dst += 8;
|
||||
count -= 8;
|
||||
}
|
||||
|
||||
auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1_portable;
|
||||
proc(dst, src, count);
|
||||
}
|
||||
|
||||
static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
|
||||
inverted_cmyk_to<kRGB1>(dst, src, count);
|
||||
}
|
||||
|
||||
static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
|
||||
inverted_cmyk_to<kBGR1>(dst, src, count);
|
||||
}
|
||||
|
||||
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
|
||||
|
||||
// Scale a byte by another.
|
||||
@ -631,6 +714,83 @@ static void grayA_to_rgbA(uint32_t dst[], const void* vsrc, int count) {
|
||||
grayA_to_rgbA_portable(dst, src, count);
|
||||
}
|
||||
|
||||
enum Format { kRGB1, kBGR1 };
|
||||
template <Format format>
|
||||
static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) {
|
||||
auto src = (const uint32_t*)vsrc;
|
||||
|
||||
auto convert8 = [](__m128i* lo, __m128i* hi) {
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
__m128i planar;
|
||||
if (kBGR1 == format) {
|
||||
planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15);
|
||||
} else {
|
||||
planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15);
|
||||
}
|
||||
|
||||
// Swizzle the pixels to 8-bit planar.
|
||||
*lo = _mm_shuffle_epi8(*lo, planar); // ccccmmmm yyyykkkk
|
||||
*hi = _mm_shuffle_epi8(*hi, planar); // CCCCMMMM YYYYKKKK
|
||||
__m128i cm = _mm_unpacklo_epi32(*lo, *hi), // ccccCCCC mmmmMMMM
|
||||
yk = _mm_unpackhi_epi32(*lo, *hi); // yyyyYYYY kkkkKKKK
|
||||
|
||||
// Unpack to 16-bit planar.
|
||||
__m128i c = _mm_unpacklo_epi8(cm, zeros), // c_c_c_c_ C_C_C_C_
|
||||
m = _mm_unpackhi_epi8(cm, zeros), // m_m_m_m_ M_M_M_M_
|
||||
y = _mm_unpacklo_epi8(yk, zeros), // y_y_y_y_ Y_Y_Y_Y_
|
||||
k = _mm_unpackhi_epi8(yk, zeros); // k_k_k_k_ K_K_K_K_
|
||||
|
||||
// Scale to r, g, b.
|
||||
__m128i r = scale(c, k),
|
||||
g = scale(m, k),
|
||||
b = scale(y, k);
|
||||
|
||||
// Repack into interlaced pixels.
|
||||
__m128i rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)), // rgrgrgrg RGRGRGRG
|
||||
ba = _mm_or_si128(b, _mm_set1_epi16((uint16_t) 0xFF00)); // b1b1b1b1 B1B1B1B1
|
||||
*lo = _mm_unpacklo_epi16(rg, ba); // rgbargba rgbargba
|
||||
*hi = _mm_unpackhi_epi16(rg, ba); // RGB1RGB1 RGB1RGB1
|
||||
};
|
||||
|
||||
while (count >= 8) {
|
||||
__m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)),
|
||||
hi = _mm_loadu_si128((const __m128i*) (src + 4));
|
||||
|
||||
convert8(&lo, &hi);
|
||||
|
||||
_mm_storeu_si128((__m128i*) (dst + 0), lo);
|
||||
_mm_storeu_si128((__m128i*) (dst + 4), hi);
|
||||
|
||||
src += 8;
|
||||
dst += 8;
|
||||
count -= 8;
|
||||
}
|
||||
|
||||
if (count >= 4) {
|
||||
__m128i lo = _mm_loadu_si128((const __m128i*) src),
|
||||
hi = _mm_setzero_si128();
|
||||
|
||||
convert8(&lo, &hi);
|
||||
|
||||
_mm_storeu_si128((__m128i*) dst, lo);
|
||||
|
||||
src += 4;
|
||||
dst += 4;
|
||||
count -= 4;
|
||||
}
|
||||
|
||||
auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1_portable;
|
||||
proc(dst, src, count);
|
||||
}
|
||||
|
||||
static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
|
||||
inverted_cmyk_to<kRGB1>(dst, src, count);
|
||||
}
|
||||
|
||||
static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
|
||||
inverted_cmyk_to<kBGR1>(dst, src, count);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {
|
||||
@ -665,6 +825,14 @@ static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
|
||||
grayA_to_rgbA_portable(dst, src, count);
|
||||
}
|
||||
|
||||
static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
|
||||
inverted_CMYK_to_RGB1_portable(dst, src, count);
|
||||
}
|
||||
|
||||
static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
|
||||
inverted_CMYK_to_BGR1_portable(dst, src, count);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user