Optimize blend_transformed_tiled_argb().

Profiling indicates that this function is one of two hot spots causing a noticeable latency when changing KDE virtual desktops. Instead of computing two modulos per pixel in the inner loop, it is possible to compute the modulos outside the loop and compute a modulo sum in the inner loop for a reasonable speedup. Change-Id: Ic4217b7686e031d7673b3e10aa977dae263096dc Reviewed-by: Gunnar Sletta <gunnar.sletta@jollamobile.com>
2013-07-29 13:40:08 +03:00 · 2013-07-29 13:40:08 +03:00 · 0e65cec6b4
commit 0e65cec6b4
parent 18c04d0ab6
1 changed files with 14 additions and 6 deletions
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@ -5116,13 +5116,13 @@ static void blend_transformed_tiled_argb(int count, const QSpan *spans, void *us
                int l = qMin(length, buffer_size);
                const uint *end = buffer + l;
                uint *b = buffer;
+                int px16 = x % (image_width << 16);
+                int py16 = y % (image_height << 16);
+                int px_delta = fdx % (image_width << 16);
+                int py_delta = fdy % (image_height << 16);
                while (b < end) {
-                    int px = x >> 16;
-                    int py = y >> 16;
-                    px %= image_width;
-                    py %= image_height;
-                    if (px < 0) px += image_width;
-                    if (py < 0) py += image_height;
+                    int px = px16 >> 16;
+                    int py = py16 >> 16;
                    int y_offset = py * scanline_offset;

                    Q_ASSERT(px >= 0 && px < image_width);
@ -5131,6 +5131,14 @@ static void blend_transformed_tiled_argb(int count, const QSpan *spans, void *us
                    *b = image_bits[y_offset + px];
                    x += fdx;
                    y += fdy;
+                    px16 += px_delta;
+                    if (px16 >= image_width << 16)
+                        px16 -= image_width << 16;
+                    py16 += py_delta;
+                    if (py16 >= image_height << 16)
+                        py16 -= image_height << 16;
+                    if (px16 < 0) px16 += image_width << 16;
+                    if (py16 < 0) py16 += image_height << 16;
                    ++b;
                }
                func(target, buffer, l, coverage);