mirror of
https://gitlab.gnome.org/GNOME/gtk.git
synced 2025-01-13 22:10:08 +00:00
Fix and uncomment non-MMX version.
Thu Jan 3 19:31:58 2002 Owen Taylor <otaylor@redhat.com> * pixops/pixops.c (scale_line_22_33): Fix and uncomment non-MMX version. * pixops/pixops.c (pixops_composite_nearest): Remove a division. * pixops/pixops.c (pixops_composite): Add some docs about the parameters. * pixops/README: Add notes about the correct algorithms for alpha compositing and how to implement them quickly.
This commit is contained in:
parent
317ba5b267
commit
c96a394cc2
@ -1,3 +1,17 @@
|
||||
Thu Jan 3 19:31:58 2002 Owen Taylor <otaylor@redhat.com>
|
||||
|
||||
* pixops/pixops.c (scale_line_22_33): Fix and uncomment
|
||||
non-MMX version.
|
||||
|
||||
* pixops/pixops.c (pixops_composite_nearest): Remove a
|
||||
division.
|
||||
|
||||
* pixops/pixops.c (pixops_composite): Add some docs
|
||||
about the parameters.
|
||||
|
||||
* pixops/README: Add notes about the correct algorithms
|
||||
for alpha compositing and how to implement them quickly.
|
||||
|
||||
2001-12-29 Tor Lillqvist <tml@iki.fi>
|
||||
|
||||
* Makefile.am (EXTRA_DIST): Add makefile.msc.
|
||||
|
@ -33,6 +33,61 @@ for a number of the most common special cases:
|
||||
compositing from RGBA to RGBx
|
||||
compositing against a color from RGBA and storing in a RGBx buffer
|
||||
|
||||
Alpha compositing 8 bit RGBAa onto RGB is defined in terms of
|
||||
rounding the exact result (real values in [0,1]):
|
||||
|
||||
cc = ca * aa + (1 - aa) * Cb
|
||||
|
||||
Cc = ROUND [255. * (Ca/255. * Aa/255. + (1 - Aa/255.) * Cb/255.)]
|
||||
|
||||
We can comp
|
||||
|
||||
ROUND(i / 255.) can be computed exactly for i in [0,255*255] as:
|
||||
|
||||
t = i + 0x80; result = (t + (t >> 8)) >> 8; [ call this as To8(i) ]
|
||||
|
||||
So,
|
||||
|
||||
t = Ca * Aa + (255 - Aa) * Cb + 0x80;
|
||||
Cc = (t + (t >> 8)) >> 8;
|
||||
|
||||
Alpha compositing 8 bit RaGaBaAa onto RbGbBbAa is a little harder, for
|
||||
non-premultiplied alpha. The premultiplied result is simple:
|
||||
|
||||
ac = aa + (1 - aa) * ab
|
||||
cc = ca + (1 - aa) * cb
|
||||
|
||||
Which can be computed in integers terms as:
|
||||
|
||||
Cc = Ca + To8 ((255 - Aa) * Cb)
|
||||
Ac = Aa + To8 ((255 - Aa) * Ab)
|
||||
|
||||
For non-premultiplied alpha, we need divide the color components by
|
||||
the alpha:
|
||||
|
||||
+- (ca * aa + (1 - aa) * ab * cb)) / ac; aa != 0
|
||||
cc = |
|
||||
+- cb; aa == 0
|
||||
|
||||
To calculate this as in integer, we note the alternate form:
|
||||
|
||||
cc = cb + aa * (ca - cb) / ac
|
||||
|
||||
[ 'cc = ca + (ac - aa) * (cb - ca) / ac' can also be useful numerically,
|
||||
but isn't important here ]
|
||||
|
||||
We can express this as integers as:
|
||||
|
||||
Ac_tmp = Aa * 255 + (255 - Aa) * Ab;
|
||||
|
||||
+- Cb + (255 * Aa * (Ca - Cb) + Ac_tmp / 2) / Ac_tmp ; Ca > Cb
|
||||
Cc = |
|
||||
+- Cb - (255 * Aa * (Cb - Ca) + Ac_tmp / 2) / Ac_tmp ; ca <= Cb
|
||||
|
||||
Or, playing bit tricks to avoid the conditional
|
||||
|
||||
Cc = Cb + (255 * Aa * (Ca - Cb) + (((Ca - Cb) >> 8) ^ (Ac_tmp / 2)) ) / Ac_tmp
|
||||
|
||||
TODO
|
||||
====
|
||||
|
||||
@ -57,13 +112,13 @@ TODO
|
||||
the _nearest() variants do it right, most of the other code
|
||||
is wrong to some degree or another.
|
||||
|
||||
For instance, in composite line, we have:
|
||||
For instance, in composite_line_22_4a4(), we have:
|
||||
|
||||
dest[0] = ((0xff0000 - a) * dest[0] + r) >> 24;
|
||||
|
||||
if a is 0, then we have:
|
||||
if a is 0 (implies r == 0), then we have:
|
||||
|
||||
(0xff0000 * dest[0] + r) >> 24
|
||||
(0xff0000 * dest[0]) >> 24
|
||||
|
||||
which gives results which are 1 to low:
|
||||
|
||||
|
@ -202,10 +202,14 @@ pixops_composite_nearest (guchar *dest_buf,
|
||||
else
|
||||
{
|
||||
unsigned int a1 = 0xff - a0;
|
||||
|
||||
dest[0] = (a0 * p[0] + a1 * dest[0]) / 0xff;
|
||||
dest[1] = (a0 * p[1] + a1 * dest[1]) / 0xff;
|
||||
dest[2] = (a0 * p[2] + a1 * dest[2]) / 0xff;
|
||||
unsigned int tmp;
|
||||
|
||||
tmp = a0 * p[0] + a1 * dest[0] + 0x80;
|
||||
dest[0] = (tmp + (tmp >> 8)) >> 8;
|
||||
tmp = a0 * p[1] + a1 * dest[1] + 0x80;
|
||||
dest[1] = (tmp + (tmp >> 8)) >> 8;
|
||||
tmp = a0 * p[2] + a1 * dest[2] + 0x80;
|
||||
dest[2] = (tmp + (tmp >> 8)) >> 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -388,7 +392,7 @@ composite_line (int *weights, int n_x, int n_y,
|
||||
int *pixel_weights;
|
||||
|
||||
pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y;
|
||||
|
||||
|
||||
for (i=0; i<n_y; i++)
|
||||
{
|
||||
guchar *q = src[i] + x_scaled * src_channels;
|
||||
@ -837,10 +841,9 @@ scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y,
|
||||
}
|
||||
#endif /* USE_MMX */
|
||||
|
||||
#ifdef SCALE_LINE_22_33_USED /* This dead code would need changes if we wanted to use it */
|
||||
static guchar *
|
||||
scale_line_22_33 (int *weights, int n_x, int n_y,
|
||||
guchar *dest, guchar *dest_end, int dest_channels, int dest_has_alpha,
|
||||
guchar *dest, int dest_x, guchar *dest_end, int dest_channels, int dest_has_alpha,
|
||||
guchar **src, int src_channels, gboolean src_has_alpha,
|
||||
int x_init, int x_step, int src_width,
|
||||
int check_size, guint32 color1, guint32 color2)
|
||||
@ -860,8 +863,8 @@ scale_line_22_33 (int *weights, int n_x, int n_y,
|
||||
q0 = src0 + x_scaled * 3;
|
||||
q1 = src1 + x_scaled * 3;
|
||||
|
||||
pixel_weights = (int *)((char *)weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS - 4)) & (SUBSAMPLE_MASK << 4)));
|
||||
|
||||
pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * 4;
|
||||
|
||||
w1 = pixel_weights[0];
|
||||
w2 = pixel_weights[1];
|
||||
w3 = pixel_weights[2];
|
||||
@ -883,9 +886,9 @@ scale_line_22_33 (int *weights, int n_x, int n_y,
|
||||
g += w4 * q1[5];
|
||||
b += w4 * q1[6];
|
||||
|
||||
dest[0] = r >> 16;
|
||||
dest[1] = g >> 16;
|
||||
dest[2] = b >> 16;
|
||||
dest[0] = (r + 0x8000) >> 16;
|
||||
dest[1] = (g + 0x8000) >> 16;
|
||||
dest[2] = (b + 0x8000) >> 16;
|
||||
|
||||
dest += 3;
|
||||
|
||||
@ -894,7 +897,6 @@ scale_line_22_33 (int *weights, int n_x, int n_y,
|
||||
|
||||
return dest;
|
||||
}
|
||||
#endif /* SCALE_LINE_22_33_USED */
|
||||
|
||||
static void
|
||||
process_pixel (int *weights, int n_x, int n_y,
|
||||
@ -1227,7 +1229,7 @@ bilinear_make_fast_weights (PixopsFilter *filter, double x_scale, double y_scale
|
||||
|
||||
for (i = 0; i < n_y; i++)
|
||||
for (j = 0; j < n_x; j++)
|
||||
*(pixel_weights + n_x * i + j) = 65536 * x_weights[j] * x_scale * y_weights[i] * y_scale * overall_alpha;
|
||||
*(pixel_weights + n_x * i + j) = 65536 * x_weights[j] * x_scale * y_weights[i] * y_scale * overall_alpha + 0.5;
|
||||
}
|
||||
|
||||
g_free (x_weights);
|
||||
@ -1412,6 +1414,30 @@ pixops_composite_color (guchar *dest_buf,
|
||||
g_free (filter.weights);
|
||||
}
|
||||
|
||||
/**
|
||||
* pixops_composite:
|
||||
* @dest_buf: pointer to location to store result
|
||||
* @render_x0: x0 of region of scaled source to store into @dest_buf
|
||||
* @render_y0: y0 of region of scaled source to store into @dest_buf
|
||||
* @render_x1: x1 of region of scaled source to store into @dest_buf
|
||||
* @render_y1: x1 of region of scaled source to store into @dest_buf
|
||||
* @dest_rowstride: rowstride of @dest_buf
|
||||
* @dest_channels: number of channels in @dest_buf
|
||||
* @dest_has_alpha: whether @dest_buf has alpha
|
||||
* @src_buf: pointer to source pixels
|
||||
* @src_width: width of source (used for clipping)
|
||||
* @src_height: height of source (used for clipping)
|
||||
* @src_rowstride: rowstride of source
|
||||
* @src_channels: number of channels in @src_buf
|
||||
* @src_has_alpha: whether @src_buf has alpha
|
||||
* @scale_x: amount to scale source by in X direction
|
||||
* @scale_y: amount to scale source by in Y direction
|
||||
* @interp_type: type of enumeration
|
||||
* @overall_alpha: overall alpha factor to multiply source by
|
||||
*
|
||||
* Scale source buffer by scale_x / scale_y, then composite a given rectangle
|
||||
* of the result into the destination buffer.
|
||||
**/
|
||||
void
|
||||
pixops_composite (guchar *dest_buf,
|
||||
int render_x0,
|
||||
@ -1550,12 +1576,16 @@ pixops_scale (guchar *dest_buf,
|
||||
break;
|
||||
}
|
||||
|
||||
if (filter.n_x == 2 && filter.n_y == 2 && dest_channels == 3 && src_channels == 3)
|
||||
{
|
||||
#ifdef USE_MMX
|
||||
if (filter.n_x == 2 && filter.n_y == 2 &&
|
||||
found_mmx && dest_channels == 3 && src_channels == 3)
|
||||
line_func = scale_line_22_33_mmx_stub;
|
||||
if (found_mmx)
|
||||
line_func = scale_line_22_33_mmx_stub;
|
||||
else
|
||||
#endif
|
||||
line_func = scale_line_22_33;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
line_func = scale_line;
|
||||
|
||||
pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1,
|
||||
|
@ -55,6 +55,12 @@ _pixops_scale_line_22_33_mmx:
|
||||
cmpl %esi,28(%ebp)
|
||||
je .out
|
||||
|
||||
/* For the body of this loop, %mm01, %mm1, %mm2, %mm3 hold the 4 adjoining
|
||||
* points we are interpolating between, as:
|
||||
*
|
||||
* 000000BB00GG00RR
|
||||
*/
|
||||
|
||||
/* Load initial values into %mm1, %mm3 */
|
||||
leal (%edx,%edx,2),%edx # Multiply by 3
|
||||
|
||||
@ -82,11 +88,16 @@ _pixops_scale_line_22_33_mmx:
|
||||
jmp .newx
|
||||
.p2align 4,,7
|
||||
.loop:
|
||||
/* int x_index = (x & 0xf000) >> 12 */
|
||||
/* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y
|
||||
* 16 4 0xf 2 2
|
||||
*/
|
||||
movl %ebx,%eax
|
||||
andl $0xf000,%eax
|
||||
shrl $7,%eax
|
||||
|
||||
/* At this point, %edi holds weights. Load the 4 weights into %mm4,%mm5,%mm6,%mm7, multiply and
|
||||
* accumulate.
|
||||
*/
|
||||
movq (%edi,%eax),%mm4
|
||||
pmullw %mm0,%mm4
|
||||
movq 8(%edi,%eax),%mm5
|
||||
@ -99,7 +110,17 @@ _pixops_scale_line_22_33_mmx:
|
||||
paddw %mm6, %mm7
|
||||
paddw %mm5, %mm7
|
||||
|
||||
/* %mm7 holds the accumulated sum. Compute (C + 0x80) / 256
|
||||
*/
|
||||
pxor %mm4, %mm4
|
||||
movl $8421504, %eax # 0x00808080
|
||||
movd %eax, %mm6
|
||||
punpcklbw %mm4, %mm6
|
||||
paddw %mm6, %mm7
|
||||
psrlw $8, %mm7
|
||||
|
||||
/* Pack into %eax and store result
|
||||
*/
|
||||
packuswb %mm7, %mm7
|
||||
movd %mm7, %eax
|
||||
|
||||
@ -113,7 +134,7 @@ _pixops_scale_line_22_33_mmx:
|
||||
|
||||
/* x += x_step; */
|
||||
addl 24(%ebp),%ebx
|
||||
/* x_scale = x >> 16; */
|
||||
/* x_scaled = x >> 16; */
|
||||
movl %ebx,%edx
|
||||
sarl $16,%edx
|
||||
|
||||
@ -131,7 +152,6 @@ _pixops_scale_line_22_33_mmx:
|
||||
leal (%edx,%edx,2),%edx # Multiply by 3
|
||||
|
||||
movl 16(%ebp),%edi
|
||||
pxor %mm4, %mm4
|
||||
movzbl 2(%edi,%edx),%ecx
|
||||
shll $16,%ecx
|
||||
movzwl (%edi,%edx),%eax
|
||||
|
Loading…
Reference in New Issue
Block a user