forked from AuroraMiddleware/gtk
gdk: Specialise RGBA8 → premultiplied BGRA8 conversion
On x86 on a Kaby Lake CPU, this makes it go from 6.63% of the total execution time (loading some PNGs using the cairo backend) down to 3.20%. On ARM on a Cortex-A7, on the same workload, this makes it go from 57% to 8.36%.
This commit is contained in:
parent
82e4690564
commit
0e3ed7a738
@ -166,6 +166,29 @@ r32g32b32a32_float_from_float (guchar *dest,
|
||||
memcpy (dest, src, sizeof (float) * n * 4);
|
||||
}
|
||||
|
||||
// This one conversion is quite important, it converts from RGBA with straight
|
||||
// alpha (as found in PNG for instance) to BGRA with premultiplied alpha (the
|
||||
// sole cairo format available).
|
||||
static void
|
||||
r8g8b8a8_to_b8g8r8a8_premultiplied (guchar *dest,
|
||||
const guchar *src,
|
||||
gsize n)
|
||||
{
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
guchar a = src[3];
|
||||
guint16 r = (guint16)src[0] * a + 127;
|
||||
guint16 g = (guint16)src[1] * a + 127;
|
||||
guint16 b = (guint16)src[2] * a + 127;
|
||||
dest[0] = (b + (b >> 8) + 1) >> 8;
|
||||
dest[1] = (g + (g >> 8) + 1) >> 8;
|
||||
dest[2] = (r + (r >> 8) + 1) >> 8;
|
||||
dest[3] = a;
|
||||
dest += 4;
|
||||
src += 4;
|
||||
}
|
||||
}
|
||||
|
||||
struct _GdkMemoryFormatDescription
|
||||
{
|
||||
GdkMemoryAlpha alpha;
|
||||
@ -479,6 +502,17 @@ gdk_memory_convert (guchar *dest_data,
|
||||
g_assert (dest_format < GDK_MEMORY_N_FORMATS);
|
||||
g_assert (src_format < GDK_MEMORY_N_FORMATS);
|
||||
|
||||
if (src_format == GDK_MEMORY_R8G8B8A8 && dest_format == GDK_MEMORY_B8G8R8A8_PREMULTIPLIED)
|
||||
{
|
||||
for (y = 0; y < height; y++)
|
||||
{
|
||||
r8g8b8a8_to_b8g8r8a8_premultiplied (dest_data, src_data, width);
|
||||
src_data += src_stride;
|
||||
dest_data += dest_stride;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
tmp = g_new (float, width * 4);
|
||||
|
||||
for (y = 0; y < height; y++)
|
||||
|
Loading…
Reference in New Issue
Block a user