gtk2/gtk/gtkcairoblur.c
Alexander Larsson d0dc1f52cc gtkcairoblur: Unroll inner loop for common radius values
This unrolls the inner blur loop for radius 1-10, allowing
the compiler to use a divide-by-constant operation instead
of a generic division.

Here is the blur-performance output before:

Radius  1: 124.95 msec, 32.01 kpixels/msec:
Radius  2: 117.27 msec, 34.11 kpixels/msec:
Radius  3: 123.57 msec, 32.37 kpixels/msec:
Radius  4: 118.17 msec, 33.85 kpixels/msec:
Radius  5: 119.32 msec, 33.52 kpixels/msec:
Radius  6: 124.17 msec, 32.21 kpixels/msec:
Radius  7: 121.04 msec, 33.05 kpixels/msec:
Radius  8: 130.64 msec, 30.62 kpixels/msec:
Radius  9: 119.47 msec, 33.48 kpixels/msec:
Radius 10: 117.95 msec, 33.91 kpixels/msec:
Radius 11: 122.38 msec, 32.68 kpixels/msec:
Radius 12: 121.92 msec, 32.81 kpixels/msec:
Radius 13: 125.45 msec, 31.89 kpixels/msec:
Radius 14: 121.63 msec, 32.89 kpixels/msec:
Radius 15: 120.18 msec, 33.28 kpixels/msec:

And after:

Radius  1: 42.26 msec, 94.65 kpixels/msec:
Radius  2: 59.15 msec, 67.62 kpixels/msec:
Radius  3: 60.29 msec, 66.35 kpixels/msec:
Radius  4: 64.53 msec, 61.99 kpixels/msec:
Radius  5: 60.07 msec, 66.59 kpixels/msec:
Radius  6: 62.43 msec, 64.07 kpixels/msec:
Radius  7: 60.36 msec, 66.27 kpixels/msec:
Radius  8: 59.59 msec, 67.13 kpixels/msec:
Radius  9: 76.17 msec, 52.51 kpixels/msec:
Radius 10: 79.41 msec, 50.37 kpixels/msec:
Radius 11: 118.92 msec, 33.64 kpixels/msec:
Radius 12: 121.31 msec, 32.97 kpixels/msec:
Radius 13: 118.30 msec, 33.81 kpixels/msec:
Radius 14: 116.82 msec, 34.24 kpixels/msec:
Radius 15: 116.99 msec, 34.19 kpixels/msec:

I.e. almost double performance for the unrolled radius values.

https://bugzilla.gnome.org/show_bug.cgi?id=746468
2015-03-21 21:07:26 +01:00

256 lines
8.3 KiB
C

/* GTK - The GIMP Toolkit
*
* Copyright (C) 2014 Red Hat
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
* Written by:
* Jasper St. Pierre <jstpierre@mecheye.net>
* Owen Taylor <otaylor@redhat.com>
*/
#include "gtkcairoblurprivate.h"
#include <math.h>
#include <string.h>
/*
* Gets the size for a single box blur.
*
* Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
* approximating a Gaussian using box blurs. This yields quite a good
* approximation for a Gaussian. For more details, see:
* http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
* https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
*/
#define GAUSSIAN_SCALE_FACTOR ((3.0 * sqrt(2 * G_PI) / 4))
#define get_box_filter_size(radius) ((int)(GAUSSIAN_SCALE_FACTOR * radius))
/* This applies a single box blur pass to a horizontal range of pixels;
* since the box blur has the same weight for all pixels, we can
* implement an efficient sliding window algorithm where we add
* in pixels coming into the window from the right and remove
* them when they leave the windw to the left.
*
* d is the filter width; for even d shift indicates how the blurred
* result is aligned with the original - does ' x ' go to ' yy' (shift=1)
* or 'yy ' (shift=-1)
*/
static void
blur_xspan (guchar *row,
guchar *tmp_buffer,
int row_width,
int d,
int shift)
{
int offset;
int sum = 0;
int i;
if (d % 2 == 1)
offset = d / 2;
else
offset = (d - shift) / 2;
/* All the conditionals in here look slow, but the branches will
* be well predicted and there are enough different possibilities
* that trying to write this as a series of unconditional loops
* is hard and not an obvious win. The main slow down here seems
* to be the integer division per pixel; one possible optimization
* would be to accumulate into two 16-bit integer buffers and
* only divide down after all three passes. (SSE parallel implementation
* of the divide step is possible.)
*/
#define BLUR_ROW_KERNEL(D) \
for (i = -(D) + offset; i < row_width + offset; i++) \
{ \
if (i >= 0 && i < row_width) \
sum += row[i]; \
\
if (i >= offset) \
{ \
if (i >= (D)) \
sum -= row[i - (D)]; \
\
tmp_buffer[i - offset] = (sum + (D) / 2) / (D); \
} \
} \
break;
/* We unroll the values for d for radius 2-10 to avoid a generic
* divide operation (not radius 1, because its a no-op) */
switch (d)
{
case get_box_filter_size (2): BLUR_ROW_KERNEL (get_box_filter_size (2));
case get_box_filter_size (3): BLUR_ROW_KERNEL (get_box_filter_size (3));
case get_box_filter_size (4): BLUR_ROW_KERNEL (get_box_filter_size (4));
case get_box_filter_size (5): BLUR_ROW_KERNEL (get_box_filter_size (5));
case get_box_filter_size (6): BLUR_ROW_KERNEL (get_box_filter_size (6));
case get_box_filter_size (7): BLUR_ROW_KERNEL (get_box_filter_size (7));
case get_box_filter_size (8): BLUR_ROW_KERNEL (get_box_filter_size (8));
case get_box_filter_size (9): BLUR_ROW_KERNEL (get_box_filter_size (9));
case get_box_filter_size (10): BLUR_ROW_KERNEL (get_box_filter_size (10));
default: BLUR_ROW_KERNEL (d);
}
memcpy (row, tmp_buffer, row_width);
}
static void
blur_rows (guchar *dst_buffer,
guchar *tmp_buffer,
int buffer_width,
int buffer_height,
int d)
{
int i;
for (i = 0; i < buffer_height; i++)
{
guchar *row = dst_buffer + i * buffer_width;
/* We want to produce a symmetric blur that spreads a pixel
* equally far to the left and right. If d is odd that happens
* naturally, but for d even, we approximate by using a blur
* on either side and then a centered blur of size d + 1.
* (technique also from the SVG specification)
*/
if (d % 2 == 1)
{
blur_xspan (row, tmp_buffer, buffer_width, d, 0);
blur_xspan (row, tmp_buffer, buffer_width, d, 0);
blur_xspan (row, tmp_buffer, buffer_width, d, 0);
}
else
{
blur_xspan (row, tmp_buffer, buffer_width, d, 1);
blur_xspan (row, tmp_buffer, buffer_width, d, -1);
blur_xspan (row, tmp_buffer, buffer_width, d + 1, 0);
}
}
}
/* Swaps width and height.
*/
static void
flip_buffer (guchar *dst_buffer,
guchar *src_buffer,
int width,
int height)
{
/* Working in blocks increases cache efficiency, compared to reading
* or writing an entire column at once
*/
#define BLOCK_SIZE 16
int i0, j0;
for (i0 = 0; i0 < width; i0 += BLOCK_SIZE)
for (j0 = 0; j0 < height; j0 += BLOCK_SIZE)
{
int max_j = MIN(j0 + BLOCK_SIZE, height);
int max_i = MIN(i0 + BLOCK_SIZE, width);
int i, j;
for (i = i0; i < max_i; i++)
for (j = j0; j < max_j; j++)
dst_buffer[i * height + j] = src_buffer[j * width + i];
}
#undef BLOCK_SIZE
}
static void
_boxblur (guchar *buffer,
int width,
int height,
int radius)
{
guchar *flipped_buffer;
int d = get_box_filter_size (radius);
flipped_buffer = g_malloc (width * height);
/* Step 1: swap rows and columns */
flip_buffer (flipped_buffer, buffer, width, height);
/* Step 2: blur rows (really columns) */
blur_rows (flipped_buffer, buffer, height, width, d);
/* Step 3: swap rows and columns */
flip_buffer (buffer, flipped_buffer, height, width);
/* Step 4: blur rows */
blur_rows (buffer, flipped_buffer, width, height, d);
g_free (flipped_buffer);
}
/*
* _gtk_cairo_blur_surface:
* @surface: a cairo image surface.
* @radius: the blur radius.
*
* Blurs the cairo image surface at the given radius.
*/
void
_gtk_cairo_blur_surface (cairo_surface_t* surface,
double radius_d)
{
int radius = radius_d;
g_return_if_fail (surface != NULL);
g_return_if_fail (cairo_surface_get_type (surface) == CAIRO_SURFACE_TYPE_IMAGE);
g_return_if_fail (cairo_image_surface_get_format (surface) == CAIRO_FORMAT_A8);
if (radius == 0)
return;
/* Before we mess with the surface, execute any pending drawing. */
cairo_surface_flush (surface);
_boxblur (cairo_image_surface_get_data (surface),
cairo_image_surface_get_stride (surface),
cairo_image_surface_get_height (surface),
radius);
/* Inform cairo we altered the surface contents. */
cairo_surface_mark_dirty (surface);
}
/*
* _gtk_cairo_blur_compute_pixels:
* @radius: the radius to compute the pixels for
*
* Computes the number of pixels necessary to extend an image in one
* direction to hold the image with shadow.
*
* This is just the number of pixels added by the blur radius, shadow
* offset and spread are not included.
*
* Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
* approximating a Gaussian using box blurs. This yields quite a good
* approximation for a Gaussian. Then we multiply this by 1.5 since our
* code wants the radius of the entire triple-box-blur kernel instead of
* the diameter of an individual box blur. For more details, see:
* http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
* https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
*/
int
_gtk_cairo_blur_compute_pixels (double radius)
{
return floor (radius * GAUSSIAN_SCALE_FACTOR * 1.5 + 0.5);
}