diff --git a/demos/testpixbuf-scale.c b/demos/testpixbuf-scale.c new file mode 100644 index 0000000000..c2891bd07e --- /dev/null +++ b/demos/testpixbuf-scale.c @@ -0,0 +1,163 @@ +#include +#include "gdk-pixbuf.h" + +#include + +ArtFilterLevel filter_level = ART_FILTER_BILINEAR; +int overall_alpha = 255; +GdkPixbuf *pixbuf; +GtkWidget *darea; + +void +set_filter_level (GtkWidget *widget, gpointer data) +{ + filter_level = GPOINTER_TO_UINT (data); + gtk_widget_queue_draw (darea); +} + +void +overall_changed_cb (GtkAdjustment *adjustment, gpointer data) +{ + if (adjustment->value != overall_alpha) + { + overall_alpha = adjustment->value; + gtk_widget_queue_draw (darea); + } +} + +gboolean +expose_cb (GtkWidget *widget, GdkEventExpose *event, gpointer data) +{ + GdkPixbuf *dest; + + dest = gdk_pixbuf_new (ART_PIX_RGB, FALSE, 8, event->area.width, event->area.height); + + if (pixbuf->art_pixbuf->has_alpha || overall_alpha != 255) + gdk_pixbuf_composite_color (pixbuf, dest, + 0, 0, event->area.width, event->area.height, + -event->area.x, -event->area.y, + (double) widget->allocation.width / pixbuf->art_pixbuf->width, + (double) widget->allocation.height / pixbuf->art_pixbuf->height, + filter_level, overall_alpha, + event->area.x, event->area.y, 16, 0xaaaaaa, 0x555555); + else + gdk_pixbuf_scale (pixbuf, dest, + 0, 0, event->area.width, event->area.height, + -event->area.x, -event->area.y, + (double) widget->allocation.width / pixbuf->art_pixbuf->width, + (double) widget->allocation.height / pixbuf->art_pixbuf->height, + filter_level); + + + gdk_pixbuf_render_to_drawable (dest, widget->window, widget->style->fg_gc[GTK_STATE_NORMAL], + 0, 0, event->area.x, event->area.y, + event->area.width, event->area.height, + GDK_RGB_DITHER_NORMAL, event->area.x, event->area.y); + + gdk_pixbuf_unref (dest); + + return TRUE; +} + +int +main(int argc, char **argv) +{ + GtkWidget *window, *vbox; + GtkWidget *menuitem, *optionmenu, *menu; + GtkWidget *alignment; + GtkWidget *hbox, *label, *hscale; + GtkAdjustment *adjustment; + GtkRequisition scratch_requisition; + + gtk_init (&argc, &argv); + gdk_rgb_init (); + + if (argc != 2) { + fprintf (stderr, "Usage: testpixbuf-scale FILE\n"); + exit (1); + } + + pixbuf = gdk_pixbuf_new_from_file (argv[1]); + if (!pixbuf) { + fprintf (stderr, "Cannot load %s\n", argv[1]); + exit(1); + } + + window = gtk_window_new (GTK_WINDOW_TOPLEVEL); + gtk_signal_connect (GTK_OBJECT (window), "destroy", + GTK_SIGNAL_FUNC (gtk_main_quit), NULL); + + vbox = gtk_vbox_new (FALSE, 0); + gtk_container_add (GTK_CONTAINER (window), vbox); + + menu = gtk_menu_new (); + + menuitem = gtk_menu_item_new_with_label ("NEAREST"); + gtk_signal_connect (GTK_OBJECT (menuitem), "activate", + GTK_SIGNAL_FUNC (set_filter_level), + GUINT_TO_POINTER (ART_FILTER_NEAREST)); + gtk_widget_show (menuitem); + gtk_container_add (GTK_CONTAINER (menu), menuitem); + + menuitem = gtk_menu_item_new_with_label ("BILINEAR"); + gtk_signal_connect (GTK_OBJECT (menuitem), "activate", + GTK_SIGNAL_FUNC (set_filter_level), + GUINT_TO_POINTER (ART_FILTER_BILINEAR)); + gtk_widget_show (menuitem); + gtk_container_add (GTK_CONTAINER (menu), menuitem); + + menuitem = gtk_menu_item_new_with_label ("TILES"); + gtk_signal_connect (GTK_OBJECT (menuitem), "activate", + GTK_SIGNAL_FUNC (set_filter_level), + GUINT_TO_POINTER (ART_FILTER_TILES)); + gtk_container_add (GTK_CONTAINER (menu), menuitem); + + menuitem = gtk_menu_item_new_with_label ("HYPER"); + gtk_signal_connect (GTK_OBJECT (menuitem), "activate", + GTK_SIGNAL_FUNC (set_filter_level), + GUINT_TO_POINTER (ART_FILTER_HYPER)); + gtk_container_add (GTK_CONTAINER (menu), menuitem); + + optionmenu = gtk_option_menu_new (); + gtk_option_menu_set_menu (GTK_OPTION_MENU (optionmenu), menu); + gtk_option_menu_set_history (GTK_OPTION_MENU (optionmenu), 1); + + alignment = gtk_alignment_new (0.0, 0.0, 0.0, 0.5); + gtk_box_pack_start (GTK_BOX (vbox), alignment, FALSE, FALSE, 0); + + hbox = gtk_hbox_new (FALSE, 4); + gtk_box_pack_start (GTK_BOX (vbox), hbox, FALSE, FALSE, 0); + + label = gtk_label_new ("Overall Alpha:"); + gtk_box_pack_start (GTK_BOX (hbox), label, FALSE, FALSE, 0); + + adjustment = GTK_ADJUSTMENT (gtk_adjustment_new (overall_alpha, 0, 255, 1, 10, 0)); + gtk_signal_connect (GTK_OBJECT (adjustment), "value_changed", + GTK_SIGNAL_FUNC (overall_changed_cb), NULL); + + hscale = gtk_hscale_new (adjustment); + gtk_scale_set_digits (GTK_SCALE (hscale), 0); + gtk_box_pack_start (GTK_BOX (hbox), hscale, TRUE, TRUE, 0); + + gtk_container_add (GTK_CONTAINER (alignment), optionmenu); + gtk_widget_show_all (vbox); + + /* Compute the size without the drawing area, so we know how big to make the default size */ + gtk_widget_size_request (vbox, &scratch_requisition); + + darea = gtk_drawing_area_new (); + gtk_box_pack_start (GTK_BOX (vbox), darea, TRUE, TRUE, 0); + + gtk_signal_connect (GTK_OBJECT (darea), "expose_event", + GTK_SIGNAL_FUNC (expose_cb), NULL); + + gtk_window_set_default_size (GTK_WINDOW (window), + pixbuf->art_pixbuf->width, + scratch_requisition.height + pixbuf->art_pixbuf->height); + + gtk_widget_show_all (window); + + gtk_main (); + + return 0; +} diff --git a/gdk-pixbuf/.cvsignore b/gdk-pixbuf/.cvsignore index c5efdf6c68..6f2afc3601 100644 --- a/gdk-pixbuf/.cvsignore +++ b/gdk-pixbuf/.cvsignore @@ -7,4 +7,5 @@ Makefile *.lo testpixbuf testpixbuf-drawable +testpixbuf-scale testanimation diff --git a/gdk-pixbuf/ChangeLog b/gdk-pixbuf/ChangeLog index 3bb68cd30e..8cfd7be4cd 100644 --- a/gdk-pixbuf/ChangeLog +++ b/gdk-pixbuf/ChangeLog @@ -1,3 +1,17 @@ +2000-01-05 Owen Taylor + + * gdk-pixbuf/pixops/: Directory full of pixel data scaling + code that will eventually migrate into libart. + + * configure.in acconfig.h: Add checks for MMX compiler support + + * gdk-pixbuf/gdk-pixbuf.h gdk-pixbuf/gdk-pixbuf-scale.c: + Nice wrapper routines for the code in pixops that operate + on pixbufs instead of raw data. + + * gdk-pixbuf/testpixbuf-scale: Test program for scaling + routines. + 2000-01-05 Jonathan Blandford * doc/tmpl/animation.sgml: Documentation changes. diff --git a/gdk-pixbuf/Makefile.am b/gdk-pixbuf/Makefile.am index 3dd9bf9f60..bf8caded64 100644 --- a/gdk-pixbuf/Makefile.am +++ b/gdk-pixbuf/Makefile.am @@ -1,3 +1,5 @@ +SUBDIRS = pixops + lib_LTLIBRARIES = \ libgdk_pixbuf.la @@ -38,7 +40,7 @@ libexec_LTLIBRARIES = \ $(PNM_LIB) \ $(BMP_LIB) -noinst_PROGRAMS = testpixbuf testpixbuf-drawable testanimation +noinst_PROGRAMS = testpixbuf testpixbuf-drawable testanimation testpixbuf-scale DEPS = libgdk_pixbuf.la INCLUDES = -I$(top_srcdir) -I$(top_builddir) \ @@ -52,10 +54,12 @@ LDADDS = libgdk_pixbuf.la $(LIBART_LIBS) $(GLIB_LIBS) $(GTK_LIBS) if INSIDE_GNOME_LIBS testpixbuf_LDADD = $(LDADDS) $(LIBART_LIBS) -lgmodule testpixbuf_drawable_LDADD = $(LDADDS) +testpixbuf_scale_LDADD = $(LDADDS) testanimation_LDADD = $(LDADDS) $(LIBART_LIBS) -lgmodule else testpixbuf_LDADD = $(LDADDS) $(LIBART_LIBS) $(GNOME_LIBS) -lgmodule testpixbuf_drawable_LDADD = $(LDADDS) $(GNOME_LIBS) +testpixbuf_scale_LDADD = $(LDADDS) $(GNOME_LIBS) testanimation_LDADD = $(LDADDS) $(LIBART_LIBS) $(GNOME_LIBS) -lgmodule endif @@ -83,6 +87,7 @@ libgdk_pixbuf_la_SOURCES = \ gdk-pixbuf-io.c \ gdk-pixbuf-loader.c \ gdk-pixbuf-render.c \ + gdk-pixbuf-scale.c \ gdk-pixbuf-util.c \ $(CANVAS_SOURCEFILES) @@ -93,6 +98,8 @@ EXTRA_GNOME_LIBS = $(GNOME_LIBS) endif libgdk_pixbuf_la_LDFLAGS = -version-info 1:0:0 $(EXTRA_GNOME_LIBS) +libgdk_pixbuf_la_LIBADD = pixops/libpixops.la + libgdk_pixbufinclude_HEADERS = \ gdk-pixbuf.h \ diff --git a/gdk-pixbuf/gdk-pixbuf-scale.c b/gdk-pixbuf/gdk-pixbuf-scale.c new file mode 100644 index 0000000000..6e0861f1e2 --- /dev/null +++ b/gdk-pixbuf/gdk-pixbuf-scale.c @@ -0,0 +1,214 @@ +#include "gdk-pixbuf.h" +#include "pixops/pixops.h" +#include "math.h" + +/** + * gdk_pixbuf_scale: + * @src: a #GdkPixbuf + * @dest: the #GdkPixbuf into which to render the results + * @dest_x: + * @dest_y: + * @dest_width: + * @dest_height: + * @offset_x: the offset in the X direction (currently rounded to an integer) + * @offset_y: the offset in the Y direction (currently rounded to an integer) + * @scale_x: the scale factor in the X direction + * @scale_y: the scale factor in the Y direction + * @filter_level: the filter quality for the transformation. + * + * Transforms the image by source image by scaling by @scale_x and @scale_y then + * translating by @offset_x and @offset_y, then renders the rectangle + * (@dest,@dest_y,@dest_width,@dest_height) of the resulting image onto the + * destination drawable replacing the previous contents. + **/ +void +gdk_pixbuf_scale (GdkPixbuf *src, + GdkPixbuf *dest, + int dest_x, + int dest_y, + int dest_width, + int dest_height, + double offset_x, + double offset_y, + double scale_x, + double scale_y, + ArtFilterLevel filter_level) +{ + offset_x = floor(offset_x + 0.5); + offset_y = floor(offset_y + 0.5); + + pixops_scale (dest->art_pixbuf->pixels + dest_y * dest->art_pixbuf->rowstride + dest_x * dest->art_pixbuf->n_channels, + -offset_x, -offset_y, dest_width - offset_x, dest_height - offset_y, + dest->art_pixbuf->rowstride, dest->art_pixbuf->n_channels, dest->art_pixbuf->has_alpha, + src->art_pixbuf->pixels, src->art_pixbuf->width, src->art_pixbuf->height, + src->art_pixbuf->rowstride, src->art_pixbuf->n_channels, src->art_pixbuf->has_alpha, + scale_x, scale_y, filter_level); +} + +/** + * gdk_pixbuf_composite: + * @src: a #GdkPixbuf + * @dest: the #GdkPixbuf into which to render the results + * @dest_x: + * @dest_y: + * @dest_width: + * @dest_height: + * @offset_x: the offset in the X direction (currently rounded to an integer) + * @offset_y: the offset in the Y direction (currently rounded to an integer) + * @scale_x: the scale factor in the X direction + * @scale_y: the scale factor in the Y direction + * @filter_level: the filter quality for the transformation. + * @overall_alpha: overall alpha for source image (0..255) + * + * Transforms the image by source image by scaling by @scale_x and @scale_y then + * translating by @offset_x and @offset_y, then composites the rectangle + * (@dest,@dest_y,@dest_width,@dest_height) of the resulting image onto the + * destination drawable. + **/ +void +gdk_pixbuf_composite (GdkPixbuf *src, + GdkPixbuf *dest, + int dest_x, + int dest_y, + int dest_width, + int dest_height, + double offset_x, + double offset_y, + double scale_x, + double scale_y, + ArtFilterLevel filter_level, + int overall_alpha) +{ + offset_x = floor(offset_x + 0.5); + offset_y = floor(offset_y + 0.5); + pixops_composite (dest->art_pixbuf->pixels + dest_y * dest->art_pixbuf->rowstride + dest_x * dest->art_pixbuf->n_channels, + -offset_x, -offset_y, dest_width - offset_x, dest_height - offset_y, + dest->art_pixbuf->rowstride, dest->art_pixbuf->n_channels, dest->art_pixbuf->has_alpha, + src->art_pixbuf->pixels, src->art_pixbuf->width, src->art_pixbuf->height, + src->art_pixbuf->rowstride, src->art_pixbuf->n_channels, src->art_pixbuf->has_alpha, + scale_x, scale_y, filter_level, overall_alpha); +} + +/** + * gdk_pixbuf_composite_color: + * @src: a #GdkPixbuf + * @dest: the #GdkPixbuf into which to render the results + * @dest_x: + * @dest_y: + * @dest_width: + * @dest_height: + * @offset_x: the offset in the X direction (currently rounded to an integer) + * @offset_y: the offset in the Y direction (currently rounded to an integer) + * @scale_x: the scale factor in the X direction + * @scale_y: the scale factor in the Y direction + * @filter_level: the filter quality for the transformation. + * @overall_alpha: overall alpha for source image (0..255) + * @check_x: the X offset for the checkboard (origin of checkboard is at -@check_x, -@check_y) + * @check_y: the Y offset for the checkboard + * @check_size: the size of checks in the checkboard (must be a power of two) + * @color1: the color of check at upper left + * @color2: the color of the other check + * + * Transforms the image by source image by scaling by @scale_x and @scale_y then + * translating by @offset_x and @offset_y, then composites the rectangle + * (@dest,@dest_y,@dest_width,@dest_height) of the resulting image with + * a checkboard of the colors @color1 and @color2 and renders it onto the + * destination drawable. + **/ +void +gdk_pixbuf_composite_color (GdkPixbuf *src, + GdkPixbuf *dest, + int dest_x, + int dest_y, + int dest_width, + int dest_height, + double offset_x, + double offset_y, + double scale_x, + double scale_y, + ArtFilterLevel filter_level, + int overall_alpha, + int check_x, + int check_y, + int check_size, + art_u32 color1, + art_u32 color2) +{ + offset_x = floor(offset_x + 0.5); + offset_y = floor(offset_y + 0.5); + + pixops_composite_color (dest->art_pixbuf->pixels + dest_y * dest->art_pixbuf->rowstride + dest_x * dest->art_pixbuf->n_channels, + -offset_x, -offset_y, dest_width - offset_x, dest_height - offset_y, + dest->art_pixbuf->rowstride, dest->art_pixbuf->n_channels, dest->art_pixbuf->has_alpha, + src->art_pixbuf->pixels, src->art_pixbuf->width, src->art_pixbuf->height, + src->art_pixbuf->rowstride, src->art_pixbuf->n_channels, src->art_pixbuf->has_alpha, + scale_x, scale_y, filter_level, overall_alpha, check_x, check_y, check_size, color1, color2); +} + +/** + * gdk_pixbuf_scale_simple: + * @src: a #GdkPixbuf + * @dest_width: the width of destination image + * @dest_height: the height of destination image + * @filter_level: the filter quality for the transformation. + * + * Scale the #GdkPixbuf @src to @dest_width x @dest_height and render the result into + * a new #GdkPixbuf. + * + * Return value: the new #GdkPixbuf + **/ +GdkPixbuf * +gdk_pixbuf_scale_simple (GdkPixbuf *src, + int dest_width, + int dest_height, + ArtFilterLevel filter_level) +{ + GdkPixbuf *dest = gdk_pixbuf_new (ART_PIX_RGB, src->art_pixbuf->has_alpha, 8, dest_width, dest_height); + + gdk_pixbuf_scale (src, dest, 0, 0, dest_width, dest_height, 0, 0, + (double)dest_width / src->art_pixbuf->width, + (double)dest_height / src->art_pixbuf->height, + filter_level); + + return dest; +} + +/** + * gdk_pixbuf_composite_color_simple: + * @src: a #GdkPixbuf + * @dest_width: the width of destination image + * @dest_height: the height of destination image + * @filter_level: the filter quality for the transformation. + * @overall_alpha: overall alpha for source image (0..255) + * @check_size: the size of checks in the checkboard (must be a power of two) + * @color1: the color of check at upper left + * @color2: the color of the other check + * + * Scale the #GdkPixbuf @src to @dest_width x @dest_height composite the result with + * a checkboard of colors @color1 and @color2 and render the result into + * a new #GdkPixbuf. + * + * Return value: the new #GdkPixbuf + **/ +GdkPixbuf * +gdk_pixbuf_composite_color_simple (GdkPixbuf *src, + int dest_width, + int dest_height, + ArtFilterLevel filter_level, + int overall_alpha, + int check_size, + art_u32 color1, + art_u32 color2) +{ + GdkPixbuf *dest = gdk_pixbuf_new (ART_PIX_RGB, src->art_pixbuf->has_alpha, 8, dest_width, dest_height); + + gdk_pixbuf_composite_color (src, dest, 0, 0, dest_width, dest_height, 0, 0, + (double)dest_width / src->art_pixbuf->width, + (double)dest_height / src->art_pixbuf->height, + filter_level, overall_alpha, 0, 0, check_size, color1, color2); + + return dest; +} + + + diff --git a/gdk-pixbuf/gdk-pixbuf.h b/gdk-pixbuf/gdk-pixbuf.h index 7a30f3c8ed..28e7f4da64 100644 --- a/gdk-pixbuf/gdk-pixbuf.h +++ b/gdk-pixbuf/gdk-pixbuf.h @@ -27,6 +27,7 @@ #include #include +#include #include #ifdef __cplusplus @@ -162,6 +163,62 @@ GdkPixbuf *gdk_pixbuf_get_from_drawable (GdkPixbuf *dest, int dest_x, int dest_y, int width, int height); +/* Scaling */ + +void gdk_pixbuf_scale (GdkPixbuf *src, + GdkPixbuf *dest, + int dest_x, + int dest_y, + int dest_width, + int dest_height, + double offset_x, + double offset_y, + double scale_x, + double scale_y, + ArtFilterLevel filter_level); +void gdk_pixbuf_composite (GdkPixbuf *src, + GdkPixbuf *dest, + int dest_x, + int dest_y, + int dest_width, + int dest_height, + double offset_x, + double offset_y, + double scale_x, + double scale_y, + ArtFilterLevel filter_level, + int overall_alpha); +void gdk_pixbuf_composite_color (GdkPixbuf *src, + GdkPixbuf *dest, + int dest_x, + int dest_y, + int dest_width, + int dest_height, + double offset_x, + double offset_y, + double scale_x, + double scale_y, + ArtFilterLevel filter_level, + int overall_alpha, + int check_x, + int check_y, + int check_size, + art_u32 color1, + art_u32 color2); + +GdkPixbuf *gdk_pixbuf_scale_simple (GdkPixbuf *src, + int dest_width, + int dest_height, + ArtFilterLevel filter_level); +GdkPixbuf *gdk_pixbuf_composite_color_simple (GdkPixbuf *src, + int dest_width, + int dest_height, + ArtFilterLevel filter_level, + int overall_alpha, + int check_size, + art_u32 color1, + art_u32 color2); + /* Animation support */ GdkPixbufAnimation *gdk_pixbuf_animation_new_from_file (const char *filename); diff --git a/gdk-pixbuf/pixops/.cvsignore b/gdk-pixbuf/pixops/.cvsignore new file mode 100644 index 0000000000..8027cd33b5 --- /dev/null +++ b/gdk-pixbuf/pixops/.cvsignore @@ -0,0 +1,7 @@ +Makefile +Makefile.in +.libs +.deps +*.lo +*.la +timescale diff --git a/gdk-pixbuf/pixops/Makefile.am b/gdk-pixbuf/pixops/Makefile.am new file mode 100644 index 0000000000..bcb90bc18e --- /dev/null +++ b/gdk-pixbuf/pixops/Makefile.am @@ -0,0 +1,22 @@ +noinst_LTLIBRARIES = libpixops.la + +INCLUDES = $(GLIB_CFLAGS) + +bin_PROGRAMS = timescale + +timescale_SOURCES = timescale.c +timescale_LDADD = libpixops.la $(GLIB_LIBS) + +if USE_MMX +mmx_sources = \ + have_mmx.S \ + scale_line_22_33_mmx.S \ + composite_line_22_4a4_mmx.S \ + composite_line_color_22_4a4_mmx.S +endif + +libpixops_la_SOURCES = \ + pixops.c \ + pixops.h \ + pixops-internal.h \ + $(mmx_sources) diff --git a/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S new file mode 100644 index 0000000000..f3edc8ab08 --- /dev/null +++ b/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S @@ -0,0 +1,208 @@ + .file "composite_line_22_4a4_mmx.S" + .version "01.01" +gcc2_compiled.: +.text + .align 16 +.globl pixops_composite_line_22_4a4_mmx + .type pixops_composite_line_22_4a4_mmx,@function +/* + * Arguments + * + * weights: 8(%ebp) + * p: 12(%ebp) %esi + * q1: 16(%ebp) + * q2: 20(%ebp) + * xstep: 24(%ebp) + * p_end: 28(%ebp) + * xinit: 32(%ebp) + * +*/ +pixops_composite_line_22_4a4_mmx: +/* + * Function call entry + */ + pushl %ebp + movl %esp,%ebp + subl $28,%esp + pushl %edi + pushl %esi + pushl %ebx +/* Locals: + * int x %ebx + * int x_scaled -24(%ebp) + */ + +/* + * Setup + */ +/* Initialize variables */ + movl 32(%ebp),%ebx + movl 32(%ebp),%edx + sarl $16,%edx + movl 12(%ebp),%esi + + movl %edx,-24(%ebp) + + cmpl %esi,28(%ebp) + je .out + +/* Load initial values into %mm1, %mm3 */ + shll $2, %edx + + pxor %mm4, %mm4 + + movl 16(%ebp),%edi + movl (%edi, %edx), %eax + movd (%edi, %edx), %mm5 + punpcklbw %mm4, %mm5 + shrl $24, %eax + movl $0x010101, %ecx + mull %ecx + orl $0xff000000, %eax + movd %eax, %mm1 + punpcklbw %mm4, %mm1 + pmullw %mm5,%mm1 + + movl -24(%ebp),%edx + shll $2, %edx + + movl 20(%ebp),%edi + movl (%edi, %edx), %eax + movd (%edi, %edx), %mm5 + punpcklbw %mm4, %mm5 + shrl $24, %eax + movl $0x010101, %ecx + mull %ecx + orl $0xff000000, %eax + movd %eax, %mm3 + punpcklbw %mm4, %mm3 + pmullw %mm5,%mm3 + + psrlw $8,%mm1 + psrlw $8,%mm3 + + addl $65536,%ebx + movl %ebx,%edx + sarl $16,%edx + + jmp .newx + .p2align 4,,7 +.loop: +/* int x_index = (x & 0xf000) >> 12 */ + movl %ebx,%eax + andl $0xf000,%eax + shrl $7,%eax + + movq (%edi,%eax),%mm4 + pmullw %mm0,%mm4 + movq 8(%edi,%eax),%mm5 + pmullw %mm1,%mm5 + movq 16(%edi,%eax),%mm6 + movq 24(%edi,%eax),%mm7 + pmullw %mm2,%mm6 + pmullw %mm3,%mm7 + paddw %mm4, %mm5 + paddw %mm6, %mm7 + paddw %mm5, %mm7 + + movl $0xffff,%ecx + movd %ecx,%mm4 + psllq $48,%mm4 + movq %mm4,%mm6 + psubw %mm7,%mm4 + pand %mm6,%mm4 + + movq %mm4,%mm5 + psrlq $16,%mm4 + por %mm4,%mm5 + psrlq $32,%mm5 + por %mm4,%mm5 + + psrlw $8,%mm5 + + movd (%esi),%mm7 + pxor %mm4,%mm4 + punpcklbw %mm4, %mm7 + + pmullw %mm7,%mm5 + +/* x += x_step; */ + addl 24(%ebp),%ebx +/* x_scale = x >> 16; */ + movl %ebx,%edx + sarl $16,%edx + + paddw %mm5,%mm6 + + psrlw $8,%mm6 + packuswb %mm6, %mm6 + movd %mm6,(%esi) + + addl $4, %esi + + cmpl %esi,28(%ebp) + je .out + + cmpl %edx,-24(%ebp) + je .loop + +.newx: + movl %edx,-24(%ebp) +/* + * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2 + */ + movq %mm1, %mm0 + movq %mm3, %mm2 + + shll $2, %edx + +# %mm4 will always be already clear here +# pxor %mm4, %mm4 + + movl 16(%ebp),%edi + movl (%edi, %edx), %eax + movd (%edi, %edx), %mm5 + punpcklbw %mm4, %mm5 + shrl $24, %eax + movl $0x010101, %ecx + mull %ecx +/* + * mull destroyed %edx, need to reconstitute + */ + movl -24(%ebp),%edx + shll $2, %edx + + orl $0xff000000, %eax + movd %eax, %mm1 + punpcklbw %mm4, %mm1 + pmullw %mm5,%mm1 + + movl 20(%ebp),%edi + movl (%edi, %edx), %eax + movd (%edi, %edx), %mm5 + punpcklbw %mm4, %mm5 + shrl $24, %eax + movl $0x010101, %ecx + mull %ecx + orl $0xff000000, %eax + movd %eax, %mm3 + punpcklbw %mm4, %mm3 + pmullw %mm5,%mm3 + + psrlw $8,%mm1 + psrlw $8,%mm3 + + movl 8(%ebp),%edi + + jmp .loop + +.out: + movl %esi,%eax + emms + leal -40(%ebp),%esp + popl %ebx + popl %esi + popl %edi + movl %ebp,%esp + popl %ebp + ret diff --git a/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S new file mode 100644 index 0000000000..e3fd640d65 --- /dev/null +++ b/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S @@ -0,0 +1,219 @@ + .file "composite_line_color_22_4a4_mmx.S" + .version "01.01" +gcc2_compiled.: +.text + .align 16 +.globl pixops_composite_line_color_22_4a4_mmx + .type pixops_composite_line_color_22_4a4_mmx,@function +/* + * Arguments + * + * weights: 8(%ebp) + * p: 12(%ebp) %esi + * q1: 16(%ebp) + * q2: 20(%ebp) + * xstep: 24(%ebp) + * p_end: 28(%ebp) + * xinit: 32(%ebp) + * dest_x: 36(%ebp) + * check_shift: 40(%ebp) + * colors: 44(%ebp) + * +*/ +pixops_composite_line_color_22_4a4_mmx: +/* + * Function call entry + */ + pushl %ebp + movl %esp,%ebp + subl $28,%esp + pushl %edi + pushl %esi + pushl %ebx +/* Locals: + * int x %ebx + * int x_scaled -24(%ebp) + */ + +/* + * Setup + */ +/* Initialize variables */ + movl 32(%ebp),%ebx + movl 32(%ebp),%edx + sarl $16,%edx + movl 12(%ebp),%esi + + movl %edx,-24(%ebp) + + cmpl %esi,28(%ebp) + je .out + +/* Load initial values into %mm1, %mm3 */ + shll $2, %edx + + pxor %mm4, %mm4 + + movl 16(%ebp),%edi + movl (%edi, %edx), %eax + movd (%edi, %edx), %mm5 + punpcklbw %mm4, %mm5 + shrl $24, %eax + movl $0x010101, %ecx + mull %ecx + orl $0xff000000, %eax + movd %eax, %mm1 + punpcklbw %mm4, %mm1 + pmullw %mm5,%mm1 + +/* + * mull destroyed %edx, need to reconstitute + */ + movl -24(%ebp),%edx + shll $2, %edx + + movl 20(%ebp),%edi + movl (%edi, %edx), %eax + movd (%edi, %edx), %mm5 + punpcklbw %mm4, %mm5 + shrl $24, %eax + movl $0x010101, %ecx + mull %ecx + orl $0xff000000, %eax + movd %eax, %mm3 + punpcklbw %mm4, %mm3 + pmullw %mm5,%mm3 + + psrlw $8,%mm1 + psrlw $8,%mm3 + + addl $65536,%ebx + movl %ebx,%edx + sarl $16,%edx + + jmp .newx + .p2align 4,,7 +.loop: +/* int x_index = (x & 0xf000) >> 12 */ + movl %ebx,%eax + andl $0xf000,%eax + shrl $7,%eax + + movq (%edi,%eax),%mm4 + pmullw %mm0,%mm4 + movq 8(%edi,%eax),%mm5 + pmullw %mm1,%mm5 + movq 16(%edi,%eax),%mm6 + movq 24(%edi,%eax),%mm7 + pmullw %mm2,%mm6 + pmullw %mm3,%mm7 + paddw %mm4, %mm5 + paddw %mm6, %mm7 + paddw %mm5, %mm7 + + movl $0xffff,%ecx + movd %ecx,%mm4 + psllq $48,%mm4 + movq %mm4,%mm6 + psubw %mm7,%mm4 + pand %mm6,%mm4 + + movq %mm4,%mm5 + psrlq $16,%mm4 + por %mm4,%mm5 + psrlq $32,%mm5 + por %mm4,%mm5 + + psrlw $8,%mm5 + + movl 36(%ebp),%eax + incl 36(%ebp) + + movl 40(%ebp),%ecx + shrl %cl,%eax + andl $1,%eax + + movl 44(%ebp),%ecx + movq (%ecx,%eax,8),%mm6 + + pmullw %mm6,%mm5 + +/* x += x_step; */ + addl 24(%ebp),%ebx +/* x_scale = x >> 16; */ + movl %ebx,%edx + sarl $16,%edx + + paddw %mm5,%mm7 + + psrlw $8,%mm7 + packuswb %mm7, %mm7 + movd %mm7,(%esi) + + addl $4, %esi + + cmpl %esi,28(%ebp) + je .out + + cmpl %edx,-24(%ebp) + je .loop + +.newx: + movl %edx,-24(%ebp) +/* + * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2 + */ + movq %mm1, %mm0 + movq %mm3, %mm2 + + shll $2, %edx + + pxor %mm4, %mm4 + + movl 16(%ebp),%edi + movl (%edi, %edx), %eax + movd (%edi, %edx), %mm5 + punpcklbw %mm4, %mm5 + shrl $24, %eax + movl $0x010101, %ecx + mull %ecx +/* + * mull destroyed %edx, need to reconstitute + */ + movl -24(%ebp),%edx + shll $2, %edx + + orl $0xff000000, %eax + movd %eax, %mm1 + punpcklbw %mm4, %mm1 + pmullw %mm5,%mm1 + + movl 20(%ebp),%edi + movl (%edi, %edx), %eax + movd (%edi, %edx), %mm5 + punpcklbw %mm4, %mm5 + shrl $24, %eax + movl $0x010101, %ecx + mull %ecx + orl $0xff000000, %eax + movd %eax, %mm3 + punpcklbw %mm4, %mm3 + pmullw %mm5,%mm3 + + psrlw $8,%mm1 + psrlw $8,%mm3 + + movl 8(%ebp),%edi + + jmp .loop + +.out: + movl %esi,%eax + emms + leal -40(%ebp),%esp + popl %ebx + popl %esi + popl %edi + movl %ebp,%esp + popl %ebp + ret diff --git a/gdk-pixbuf/pixops/have_mmx.S b/gdk-pixbuf/pixops/have_mmx.S new file mode 100644 index 0000000000..da222500fe --- /dev/null +++ b/gdk-pixbuf/pixops/have_mmx.S @@ -0,0 +1,42 @@ + .file "have_mmx.S" + .version "01.01" +gcc2_compiled.: +.text + .align 16 +.globl pixops_have_mmx + .type pixops_have_mmx,@function + +pixops_have_mmx: + push %ebx + +# Check if bit 21 in flags word is writeable + + pushfl + popl %eax + movl %eax,%ebx + xorl $0x00200000, %eax + pushl %eax + popfl + pushfl + popl %eax + + cmpl %eax, %ebx + + je .notfound + +# OK, we have CPUID + + movl $1, %eax + cpuid + + test $0x00800000, %edx + jz .notfound + + movl $1, %eax + jmp .out + +.notfound: + movl $0, %eax +.out: + popl %ebx + ret \ No newline at end of file diff --git a/gdk-pixbuf/pixops/pixops-internal.h b/gdk-pixbuf/pixops/pixops-internal.h new file mode 100644 index 0000000000..b6ed3c0599 --- /dev/null +++ b/gdk-pixbuf/pixops/pixops-internal.h @@ -0,0 +1,7 @@ +#ifdef USE_MMX +art_u8 *pixops_scale_line_22_33_mmx (art_u32 weights[16][8], art_u8 *p, art_u8 *q1, art_u8 *q2, int x_step, art_u8 *p_stop, int x_init); +art_u8 *pixops_composite_line_22_4a4_mmx (art_u32 weights[16][8], art_u8 *p, art_u8 *q1, art_u8 *q2, int x_step, art_u8 *p_stop, int x_init); +art_u8 *pixops_composite_line_color_22_4a4_mmx (art_u32 weights[16][8], art_u8 *p, art_u8 *q1, art_u8 *q2, int x_step, art_u8 *p_stop, int x_init, int dest_x, int check_shift, int *colors); +int pixops_have_mmx (void); +#endif + diff --git a/gdk-pixbuf/pixops/pixops.c b/gdk-pixbuf/pixops/pixops.c new file mode 100644 index 0000000000..ac901064e5 --- /dev/null +++ b/gdk-pixbuf/pixops/pixops.c @@ -0,0 +1,1519 @@ +#include +#include +#include "config.h" + +#include "pixops.h" +#include "pixops-internal.h" + +#define SUBSAMPLE_BITS 4 +#define SUBSAMPLE (1 << SUBSAMPLE_BITS) +#define SUBSAMPLE_MASK ((1 << SUBSAMPLE_BITS)-1) +#define SCALE_SHIFT 16 + +typedef struct _PixopsFilter PixopsFilter; + +struct _PixopsFilter +{ + int *weights; + int n_x; + int n_y; + double x_offset; + double y_offset; +}; + +typedef art_u8 *(*PixopsLineFunc) (int *weights, int n_x, int n_y, + art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_init, int x_step, int src_width, + int check_size, art_u32 color1, art_u32 color2); + +typedef void (*PixopsPixelFunc) (art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha, + int src_has_alpha, int check_size, art_u32 color1, + art_u32 color2, + int r, int g, int b, int a); + +static int +get_check_shift (int check_size) +{ + int check_shift = 0; + g_return_val_if_fail (check_size >= 0, 4); + + while (!(check_size & 1)) + { + check_shift++; + check_size >>= 1; + } + + return check_shift; +} + +static void +pixops_scale_nearest (art_u8 *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + art_boolean dest_has_alpha, + art_u8 *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + art_boolean src_has_alpha, + double scale_x, + double scale_y) +{ + int i, j; + int x; + int x_step = (1 << SCALE_SHIFT) / scale_x; + int y_step = (1 << SCALE_SHIFT) / scale_y; + +#define INNER_LOOP(SRC_CHANNELS,DEST_CHANNELS) \ + for (j=0; j < (render_x1 - render_x0); j++) \ + { \ + art_u8 *p = src + (x >> SCALE_SHIFT) * SRC_CHANNELS; \ + \ + dest[0] = p[0]; \ + dest[1] = p[1]; \ + dest[2] = p[2]; \ + \ + if (DEST_CHANNELS == 4) \ + { \ + if (SRC_CHANNELS == 4) \ + *(dest++) = p[3]; \ + else \ + *(dest++) = 0xff; \ + } \ + \ + dest += DEST_CHANNELS; \ + x += x_step; \ + } + + for (i = 0; i < (render_y1 - render_y0); i++) + { + art_u8 *src = src_buf + ((i * y_step + y_step / 2) >> SCALE_SHIFT) * src_rowstride; + art_u8 *dest = dest_buf + i * dest_rowstride; + + x = render_x0 * x_step + x_step / 2; + + if (src_channels == 3) + { + if (dest_channels == 3) + { + INNER_LOOP (3, 3); + } + else + { + INNER_LOOP (3, 4); + } + } + else if (src_channels == 4) + { + if (dest_channels == 3) + { + INNER_LOOP (4, 3); + } + else + { + for (j=0; j < (render_x1 - render_x0); j++) + { + art_u8 *p = src + (x >> SCALE_SHIFT) * 4; + + *(art_u32 *)dest = *(art_u32 *)p; + + dest += 4; + x += x_step; + } + } + } + } +#undef INNER_LOOP +} + +static void +pixops_composite_nearest (art_u8 *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + art_boolean dest_has_alpha, + art_u8 *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + art_boolean src_has_alpha, + double scale_x, + double scale_y, + int overall_alpha) +{ + int i, j; + int x; + int x_step = (1 << SCALE_SHIFT) / scale_x; + int y_step = (1 << SCALE_SHIFT) / scale_y; + + for (i = 0; i < (render_y1 - render_y0); i++) + { + art_u8 *src = src_buf + (((i + render_y0) * y_step + y_step / 2) >> SCALE_SHIFT) * src_rowstride; + art_u8 *dest = dest_buf + i * dest_rowstride + render_x0 * dest_channels; + + x = render_x0 * x_step + x_step / 2; + + for (j=0; j < (render_x1 - render_x0); j++) + { + art_u8 *p = src + (x >> SCALE_SHIFT) * src_channels; + unsigned int a0; + + if (src_has_alpha) + a0 = (p[3] * overall_alpha + 0xff) >> 8; + else + a0 = overall_alpha; + + if (dest_has_alpha) + { + unsigned int a1 = dest[3]; + unsigned int total = a0 + a1; + + if (total) + { + dest[0] = (a0 * src[0] + a1 * dest[0]) / (total); + dest[1] = (a0 * src[1] + a1 * dest[1]) / (total); + dest[2] = (a0 * src[2] + a1 * dest[2]) / (total); + dest[3] = total - ((a0 * a1 + 0xff) >> 8); + } + else + { + dest[0] = 0; + dest[1] = 0; + dest[2] = 0; + dest[3] = 0; + } + } + else + { + dest[0] = dest[0] + ((a0 * (p[0] - dest[0]) + 0xff) >> 8); + dest[1] = dest[1] + ((a0 * (p[1] - dest[1]) + 0xff) >> 8); + dest[2] = dest[2] + ((a0 * (p[2] - dest[2]) + 0xff) >> 8); + + if (dest_channels == 4) + *(dest++) = 0xff; + } + + dest += dest_channels; + x += x_step; + } + } +} + +static void +pixops_composite_color_nearest (art_u8 *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + art_boolean dest_has_alpha, + art_u8 *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + art_boolean src_has_alpha, + double scale_x, + double scale_y, + int overall_alpha, + int check_x, + int check_y, + int check_size, + art_u32 color1, + art_u32 color2) +{ + int i, j; + int x; + int x_step = (1 << SCALE_SHIFT) / scale_x; + int y_step = (1 << SCALE_SHIFT) / scale_y; + int r1, g1, b1, r2, g2, b2; + int check_shift = get_check_shift (check_size); + + for (i = 0; i < (render_y1 - render_y0); i++) + { + art_u8 *src = src_buf + (((i + render_y0) * y_step + y_step/2) >> SCALE_SHIFT) * src_rowstride; + art_u8 *dest = dest_buf + i * dest_rowstride; + + x = render_x0 * x_step + x_step / 2; + + if (((i + check_y) >> check_shift) & 1) + { + r1 = color2 & 0xff0000 >> 16; + g1 = color2 & 0xff00 >> 8; + b1 = color2 & 0xff; + + r2 = color1 & 0xff0000 >> 16; + g2 = color1 & 0xff00 >> 8; + b2 = color1 & 0xff; + } + else + { + r1 = color1 & 0xff0000 >> 16; + g1 = color1 & 0xff00 >> 8; + b1 = color1 & 0xff; + + r2 = color2 & 0xff0000 >> 16; + g2 = color2 & 0xff00 >> 8; + b2 = color2 & 0xff; + } + + for (j=0 ; j < (render_x1 - render_x0); j++) + { + art_u8 *p = src + (x >> SCALE_SHIFT) * src_channels; + unsigned int a0; + + if (src_has_alpha) + a0 = (p[3] * overall_alpha + 0xff) >> 8; + else + a0 = overall_alpha; + + if ((j + check_x >> check_shift) & 1) + { + dest[0] = r2 + ((a0 * ((int)p[0] - r2) + 0xff) >> 8); + dest[1] = g2 + ((a0 * ((int)p[1] - g2) + 0xff) >> 8); + dest[2] = b2 + ((a0 * ((int)p[2] - b2) + 0xff) >> 8); + } + else + { + dest[0] = r1 + ((a0 * ((int)p[0] - r1) + 0xff) >> 8); + dest[1] = g1 + ((a0 * ((int)p[1] - g1) + 0xff) >> 8); + dest[2] = b1 + ((a0 * ((int)p[2] - b1) + 0xff) >> 8); + } + + if (dest_channels == 4) + *(dest++) = 0xff; + + dest += dest_channels; + x += x_step; + } + } +} + +static void +composite_pixel (art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha, + int src_has_alpha, int check_size, art_u32 color1, art_u32 color2, + int r, int g, int b, int a) +{ + if (dest_has_alpha) + { + unsigned int w = (((1 << 16) - a) * dest[3]) >> 8; + unsigned int total = a + w; + + if (total) + { + dest[0] = (r + w * dest[0]) / total; + dest[1] = (g + w * dest[1]) / total; + dest[2] = (b + w * dest[2]) / total; + dest[3] = (r * w) >> 16; + } + else + { + dest[0] = 0; + dest[1] = 0; + dest[2] = 0; + dest[3] = 0; + } + } + else + { + dest[0] = ((0xff0000 - a) * dest[0] + r) >> 24; + dest[1] = ((0xff0000 - a) * dest[1] + g) >> 24; + dest[2] = ((0xff0000 - a) * dest[2] + b) >> 24; + } +} + +static art_u8 * +composite_line (int *weights, int n_x, int n_y, + art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_init, int x_step, int src_width, + int check_size, art_u32 color1, art_u32 color2) +{ + int x = x_init; + int i, j; + + while (dest < dest_end) + { + int x_scaled = x >> SCALE_SHIFT; + unsigned int r = 0, g = 0, b = 0, a = 0; + int *pixel_weights; + + pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y; + + for (i=0; i> 8; + unsigned int total = a + w; + + if (total) + { + dest[0] = (r + w * dest[0]) / total; + dest[1] = (r + w * dest[1]) / total; + dest[2] = (r + w * dest[2]) / total; + dest[3] = (r * w) >> 16; + } + else + { + dest[0] = 0; + dest[1] = 0; + dest[2] = 0; + dest[3] = 0; + } + } + else + { + dest[0] = ((0xff0000 - a) * dest[0] + r) >> 24; + dest[1] = ((0xff0000 - a) * dest[1] + g) >> 24; + dest[2] = ((0xff0000 - a) * dest[2] + b) >> 24; + } + + dest += dest_channels; + x += x_step; + } + + return dest; +} + +static art_u8 * +composite_line_22_4a4 (int *weights, int n_x, int n_y, + art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_init, int x_step, int src_width, + int check_size, art_u32 color1, art_u32 color2) +{ + int x = x_init; + art_u8 *src0 = src[0]; + art_u8 *src1 = src[1]; + + g_return_val_if_fail (src_channels != 3, dest); + g_return_val_if_fail (src_has_alpha, dest); + + while (dest < dest_end) + { + int x_scaled = x >> SCALE_SHIFT; + unsigned int r, g, b, a, ta; + int *pixel_weights; + art_u8 *q0, *q1; + int w1, w2, w3, w4; + + q0 = src0 + x_scaled * 4; + q1 = src1 + x_scaled * 4; + + pixel_weights = (int *)((char *)weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS - 4)) & (SUBSAMPLE_MASK << 4))); + + w1 = pixel_weights[0]; + w2 = pixel_weights[1]; + w3 = pixel_weights[2]; + w4 = pixel_weights[3]; + + a = w1 * q0[3]; + r = a * q0[0]; + g = a * q0[1]; + b = a * q0[2]; + + ta = w2 * q0[7]; + r += ta * q0[4]; + g += ta * q0[5]; + b += ta * q0[6]; + a += ta; + + ta = w3 * q0[3]; + r += ta * q0[0]; + g += ta * q0[1]; + b += ta * q0[2]; + a += ta; + + ta += w4 * q1[7]; + r += ta * q1[4]; + g += ta * q1[5]; + b += ta * q1[6]; + a += ta; + + dest[0] = ((0xff0000 - a) * dest[0] + r) >> 24; + dest[1] = ((0xff0000 - a) * dest[1] + g) >> 24; + dest[2] = ((0xff0000 - a) * dest[2] + b) >> 24; + dest[3] = a >> 16; + + dest += 4; + x += x_step; + } + + return dest; +} + +#ifdef USE_MMX +static art_u8 * +composite_line_22_4a4_mmx_stub (int *weights, int n_x, int n_y, + art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_init, int x_step, int src_width, + int check_size, art_u32 color1, art_u32 color2) +{ + art_u32 mmx_weights[16][8]; + int j; + + for (j=0; j<16; j++) + { + mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8); + mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8); + mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8); + mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8); + mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8); + mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8); + mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8); + mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8); + } + + return pixops_composite_line_22_4a4_mmx (mmx_weights, dest, src[0], src[1], x_step, dest_end, x_init); +} +#endif /* USE_MMX */ + +static void +composite_pixel_color (art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha, + int src_has_alpha, int check_size, art_u32 color1, art_u32 color2, + int r, int g, int b, int a) +{ + int dest_r, dest_g, dest_b; + int check_shift = get_check_shift (check_size); + + if ((dest_x >> check_shift) & 1) + { + dest_r = color2 & 0xff0000 >> 16; + dest_g = color2 & 0xff00 >> 8; + dest_b = color2 & 0xff; + } + else + { + dest_r = color1 & 0xff0000 >> 16; + dest_g = color1 & 0xff00 >> 8; + dest_b = color1 & 0xff; + } + + dest[0] = ((0xff0000 - a) * dest_r + r) >> 24; + dest[1] = ((0xff0000 - a) * dest_g + g) >> 24; + dest[2] = ((0xff0000 - a) * dest_b + b) >> 24; + + if (dest_has_alpha) + dest[3] = 0xff; + else if (dest_channels == 4) + dest[3] = a >> 16; +} + +static art_u8 * +composite_line_color (int *weights, int n_x, int n_y, + art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_init, int x_step, int src_width, + int check_size, art_u32 color1, art_u32 color2) +{ + int x = x_init; + int i, j; + int check_shift = get_check_shift (check_size); + int dest_r1, dest_g1, dest_b1; + int dest_r2, dest_g2, dest_b2; + + g_return_val_if_fail (check_size != 0, dest); + + dest_r1 = color1 & 0xff0000 >> 16; + dest_g1 = color1 & 0xff00 >> 8; + dest_b1 = color1 & 0xff; + + dest_r2 = color2 & 0xff0000 >> 16; + dest_g2 = color2 & 0xff00 >> 8; + dest_b2 = color2 & 0xff; + + while (dest < dest_end) + { + int x_scaled = x >> SCALE_SHIFT; + unsigned int r = 0, g = 0, b = 0, a = 0; + int *pixel_weights; + + pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y; + + for (i=0; i> check_shift) & 1) + { + dest[0] = ((0xff0000 - a) * dest_r2 + r) >> 24; + dest[1] = ((0xff0000 - a) * dest_g2 + g) >> 24; + dest[2] = ((0xff0000 - a) * dest_b2 + b) >> 24; + } + else + { + dest[0] = ((0xff0000 - a) * dest_r1 + r) >> 24; + dest[1] = ((0xff0000 - a) * dest_g1 + g) >> 24; + dest[2] = ((0xff0000 - a) * dest_b1 + b) >> 24; + } + + if (dest_has_alpha) + dest[3] = 0xff; + else if (dest_channels == 4) + dest[3] = a >> 16; + + dest += dest_channels; + x += x_step; + dest_x++; + } + + return dest; +} + +#ifdef USE_MMX +static art_u8 * +composite_line_color_22_4a4_mmx_stub (int *weights, int n_x, int n_y, + art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_init, int x_step, int src_width, + int check_size, art_u32 color1, art_u32 color2) +{ + art_u32 mmx_weights[16][8]; + int check_shift = get_check_shift (check_size); + int colors[4]; + int j; + + for (j=0; j<16; j++) + { + mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8); + mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8); + mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8); + mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8); + mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8); + mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8); + mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8); + mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8); + } + + colors[0] = (color1 & 0xff00) << 8 | (color1 & 0xff); + colors[1] = (color1 & 0xff0000) >> 16; + colors[2] = (color2 & 0xff00) << 8 | (color2 & 0xff); + colors[3] = (color2 & 0xff0000) >> 16; + + return pixops_composite_line_color_22_4a4_mmx (mmx_weights, dest, src[0], src[1], x_step, dest_end, x_init, + dest_x, check_shift, colors); +} +#endif /* USE_MMX */ + +static void +scale_pixel (art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha, + int src_has_alpha, int check_size, art_u32 color1, art_u32 color2, + int r, int g, int b, int a) +{ + if (src_has_alpha) + { + if (a) + { + dest[0] = r / a; + dest[1] = g / a; + dest[2] = b / a; + dest[3] = a >> 16; + } + else + { + dest[0] = 0; + dest[1] = 0; + dest[2] = 0; + dest[3] = 0; + } + } + else + { + dest[0] = r >> 16; + dest[1] = g >> 16; + dest[2] = b >> 16; + + if (dest_has_alpha) + dest[3] = 0xff; + } +} + +static art_u8 * +scale_line (int *weights, int n_x, int n_y, + art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_init, int x_step, int src_width, + int check_size, art_u32 color1, art_u32 color2) +{ + int x = x_init; + int i, j; + + while (dest < dest_end) + { + int x_scaled = x >> SCALE_SHIFT; + int *pixel_weights; + + pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y; + + if (src_has_alpha) + { + unsigned int r = 0, g = 0, b = 0, a = 0; + for (i=0; i> 16; + } + else + { + dest[0] = 0; + dest[1] = 0; + dest[2] = 0; + dest[3] = 0; + } + } + else + { + unsigned int r = 0, g = 0, b = 0; + for (i=0; i> 16; + dest[1] = g >> 16; + dest[2] = b >> 16; + + if (dest_has_alpha) + dest[3] = 0xff; + } + + dest += dest_channels; + + x += x_step; + } + + return dest; +} + +#ifdef USE_MMX +static art_u8 * +scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y, + art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_init, int x_step, int src_width, + int check_size, art_u32 color1, art_u32 color2) +{ + art_u32 mmx_weights[16][8]; + int j; + + for (j=0; j<16; j++) + { + mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8); + mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8); + mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8); + mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8); + mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8); + mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8); + mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8); + mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8); + } + + return pixops_scale_line_22_33_mmx (mmx_weights, dest, src[0], src[1], x_step, dest_end, x_init); +} +#endif /* USE_MMX */ + +static art_u8 * +scale_line_22_33 (int *weights, int n_x, int n_y, + art_u8 *dest, art_u8 *dest_end, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_init, int x_step, int src_width, + int check_size, art_u32 color1, art_u32 color2) +{ + int x = x_init; + art_u8 *src0 = src[0]; + art_u8 *src1 = src[1]; + + while (dest < dest_end) + { + unsigned int r, g, b; + int x_scaled = x >> SCALE_SHIFT; + int *pixel_weights; + art_u8 *q0, *q1; + int w1, w2, w3, w4; + + q0 = src0 + x_scaled * 3; + q1 = src1 + x_scaled * 3; + + pixel_weights = (int *)((char *)weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS - 4)) & (SUBSAMPLE_MASK << 4))); + + w1 = pixel_weights[0]; + w2 = pixel_weights[1]; + w3 = pixel_weights[2]; + w4 = pixel_weights[3]; + + r = w1 * q0[0]; + g = w1 * q0[1]; + b = w1 * q0[2]; + + r += w2 * q0[3]; + g += w2 * q0[4]; + b += w2 * q0[5]; + + r += w3 * q1[0]; + g += w3 * q1[1]; + b += w3 * q1[2]; + + r += w4 * q1[4]; + g += w4 * q1[5]; + b += w4 * q1[6]; + + dest[0] = r >> 16; + dest[1] = g >> 16; + dest[2] = b >> 16; + + dest += 3; + + x += x_step; + } + + return dest; +} + +static void +process_pixel (int *weights, int n_x, int n_y, + art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha, + art_u8 **src, int src_channels, art_boolean src_has_alpha, + int x_start, int src_width, + int check_size, art_u32 color1, art_u32 color2, + PixopsPixelFunc pixel_func) +{ + unsigned int r = 0, g = 0, b = 0, a = 0; + int i, j; + + for (i=0; in_y); + + int x_step = (1 << SCALE_SHIFT) / scale_x; + int y_step = (1 << SCALE_SHIFT) / scale_y; + + int dest_x; + + /* FIXME, this computation of run_end_index is not correct */ + int run_end_index = ((src_width << SCALE_SHIFT) + (filter->n_x - 1) / 2 - filter->n_x) / x_step - render_x0; + int check_shift = check_size ? get_check_shift (check_size) : 0; + + y = render_y0 * y_step + filter->y_offset * (1 << SCALE_SHIFT); + for (i = 0; i < (render_y1 - render_y0); i++) + { + int y_start = y >> SCALE_SHIFT; + int x_start; + int *run_weights = filter->weights + ((y >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * filter->n_x * filter->n_y * SUBSAMPLE; + art_u8 *new_outbuf; + art_u32 tcolor1, tcolor2; + + art_u8 *outbuf = dest_buf + dest_rowstride * i; + art_u8 *outbuf_end = outbuf + dest_channels * (render_x1 - render_x0); + + if (((i + check_y) >> check_shift) & 1) + { + tcolor1 = color2; + tcolor2 = color1; + } + else + { + tcolor1 = color1; + tcolor2 = color2; + } + + for (j=0; jn_y; j++) + { + if (y_start < 0) + line_bufs[j] = src_buf; + else if (y_start < src_height) + line_bufs[j] = src_buf + src_rowstride * y_start; + else + line_bufs[j] = src_buf + src_rowstride * (src_height - 1); + + y_start++; + } + + dest_x = check_x; + x = render_x0 * x_step + filter->x_offset * (1 << SCALE_SHIFT); + x_start = x >> SCALE_SHIFT; + + while (x_start < 0 && outbuf < outbuf_end) + { + process_pixel (run_weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * (filter->n_x * filter->n_y), filter->n_x, filter->n_y, + outbuf, dest_x, dest_channels, dest_has_alpha, + line_bufs, src_channels, src_has_alpha, + x >> SCALE_SHIFT, src_width, + check_size, tcolor1, tcolor2, pixel_func); + + x += x_step; + x_start = x >> SCALE_SHIFT; + dest_x++; + outbuf += dest_channels; + } + + new_outbuf = (*line_func)(run_weights, filter->n_x, filter->n_y, + outbuf, dest_x, + MIN (outbuf_end, outbuf + run_end_index * dest_channels), + dest_channels, dest_has_alpha, + line_bufs, src_channels, src_has_alpha, + x, x_step, src_width, check_size, tcolor1, tcolor2); + + dest_x += (new_outbuf - outbuf) / dest_channels; + + x = dest_x * x_step + filter->x_offset * (1 << SCALE_SHIFT); + outbuf = new_outbuf; + + while (outbuf < outbuf_end) + { + process_pixel (run_weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * (filter->n_x * filter->n_y), filter->n_x, filter->n_y, + outbuf, dest_x, dest_channels, dest_has_alpha, + line_bufs, src_channels, src_has_alpha, + x >> SCALE_SHIFT, src_width, + check_size, tcolor1, tcolor2, pixel_func); + + x += x_step; + dest_x++; + outbuf += dest_channels; + } + + y += y_step; + } + + g_free (line_bufs); +} + +static void +tile_make_weights (PixopsFilter *filter, double x_scale, double y_scale, double overall_alpha) +{ + int i_offset, j_offset; + + int n_x = ceil(1/x_scale + 1); + int n_y = ceil(1/y_scale + 1); + + filter->x_offset = 0; + filter->y_offset = 0; + filter->n_x = n_x; + filter->n_y = n_y; + filter->weights = g_new (int, SUBSAMPLE * SUBSAMPLE * n_x * n_y); + + for (i_offset=0; i_offsetweights + ((i_offset*SUBSAMPLE) + j_offset) * n_x * n_y; + double x = (double)j_offset / 16; + double y = (double)i_offset / 16; + int i,j; + + for (i = 0; i < n_y; i++) + { + double tw, th; + + if (i < y) + { + if (i + 1 > y) + th = MIN(i+1, y + 1/y_scale) - y; + else + th = 0; + } + else + { + if (y + 1/y_scale > i) + th = MIN(i+1, y + 1/y_scale) - i; + else + th = 0; + } + + for (j = 0; j < n_x; j++) + { + if (j < x) + { + if (j + 1 > x) + tw = MIN(j+1, x + 1/x_scale) - x; + else + tw = 0; + } + else + { + if (x + 1/x_scale > j) + tw = MIN(j+1, x + 1/x_scale) - j; + else + tw = 0; + } + + *(pixel_weights + n_x * i + j) = 65536 * tw * x_scale * th * y_scale * overall_alpha; + } + } + } +} + +static void +bilinear_make_fast_weights (PixopsFilter *filter, double x_scale, double y_scale, double overall_alpha) +{ + int i_offset, j_offset; + double *x_weights, *y_weights; + int n_x, n_y; + + if (x_scale > 1.0) /* Bilinear */ + { + n_x = 2; + filter->x_offset = 0.5 * (1/x_scale - 1); + } + else /* Tile */ + { + n_x = ceil(1.0 + 1.0/x_scale); + filter->x_offset = 0.0; + } + + if (y_scale > 1.0) /* Bilinear */ + { + n_y = 2; + filter->y_offset = 0.5 * (1/y_scale - 1); + } + else /* Tile */ + { + n_y = ceil(1.0 + 1.0/y_scale); + filter->y_offset = 0.0; + } + + filter->n_y = n_y; + filter->n_x = n_x; + filter->weights = g_new (int, SUBSAMPLE * SUBSAMPLE * n_x * n_y); + + x_weights = g_new (double, n_x); + y_weights = g_new (double, n_y); + + for (i_offset=0; i_offsetweights + ((i_offset*SUBSAMPLE) + j_offset) * n_x * n_y; + double x = (double)j_offset / 16; + double y = (double)i_offset / 16; + int i,j; + + if (x_scale > 1.0) /* Bilinear */ + { + for (i = 0; i < n_x; i++) + { + x_weights[i] = ((i == 0) ? (1 - x) : x) / x_scale; + } + } + else /* Tile */ + { + for (i = 0; i < n_x; i++) + { + if (i < x) + { + if (i + 1 > x) + x_weights[i] = MIN(i+1, x + 1/x_scale) - x; + else + x_weights[i] = 0; + } + else + { + if (x + 1/x_scale > i) + x_weights[i] = MIN(i+1, x + 1/x_scale) - i; + else + x_weights[i] = 0; + } + } + } + + if (y_scale > 1.0) /* Bilinear */ + { + for (i = 0; i < n_y; i++) + { + y_weights[i] = ((i == 0) ? (1 - y) : y) / y_scale; + } + } + else /* Tile */ + { + for (i = 0; i < n_y; i++) + { + if (i < y) + { + if (i + 1 > y) + y_weights[i] = MIN(i+1, y + 1/y_scale) - y; + else + y_weights[i] = 0; + } + else + { + if (y + 1/y_scale > i) + y_weights[i] = MIN(i+1, y + 1/y_scale) - i; + else + y_weights[i] = 0; + } + } + } + + for (i = 0; i < n_y; i++) + for (j = 0; j < n_x; j++) + *(pixel_weights + n_x * i + j) = 65536 * x_weights[j] * x_scale * y_weights[i] * y_scale * overall_alpha; + } + + g_free (x_weights); + g_free (y_weights); +} + +static double +bilinear_quadrant (double bx0, double bx1, double by0, double by1) +{ + double ax0, ax1, ay0, ay1; + double x0, x1, y0, y1; + + ax0 = 0.; + ax1 = 1.; + ay0 = 0.; + ay1 = 1.; + + if (ax0 < bx0) + { + if (ax1 > bx0) + { + x0 = bx0; + x1 = MIN (ax1, bx1); + } + else + return 0; + } + else + { + if (bx1 > ax0) + { + x0 = ax0; + x1 = MIN (ax1, bx1); + } + else + return 0; + } + + if (ay0 < by0) + { + if (ay1 > by0) + { + y0 = by0; + y1 = MIN (ay1, by1); + } + else + return 0; + } + else + { + if (by1 > ay0) + { + y0 = ay0; + y1 = MIN (ay1, by1); + } + else + return 0; + } + + return 0.25 * (x1*x1 - x0*x0) * (y1*y1 - y0*y0); +} + +static void +bilinear_make_weights (PixopsFilter *filter, double x_scale, double y_scale, double overall_alpha) +{ + int i_offset, j_offset; + + int n_x = ceil(1/x_scale + 2.0); + int n_y = ceil(1/y_scale + 2.0); + + filter->x_offset = -1.0; + filter->y_offset = -1.0; + filter->n_x = n_x; + filter->n_y = n_y; + + filter->weights = g_new (int, SUBSAMPLE * SUBSAMPLE * n_x * n_y); + + for (i_offset=0; i_offsetweights + ((i_offset*SUBSAMPLE) + j_offset) * n_x * n_y; + double x = (double)j_offset / 16; + double y = (double)i_offset / 16; + int i,j; + + for (i = 0; i < n_y; i++) + for (j = 0; j < n_x; j++) + { + double w; + + w = bilinear_quadrant (0.5 + j - (x + 1 / x_scale), 0.5 + j - x, 0.5 + i - (y + 1 / y_scale), 0.5 + i - y); + w += bilinear_quadrant (1.5 + x - j, 1.5 + (x + 1 / x_scale) - j, 0.5 + i - (y + 1 / y_scale), 0.5 + i - y); + w += bilinear_quadrant (0.5 + j - (x + 1 / x_scale), 0.5 + j - x, 1.5 + y - i, 1.5 + (y + 1 / y_scale) - i); + w += bilinear_quadrant (1.5 + x - j, 1.5 + (x + 1 / x_scale) - j, 1.5 + y - i, 1.5 + (y + 1 / y_scale) - i); + + *(pixel_weights + n_x * i + j) = 65536 * w * x_scale * y_scale * overall_alpha; + } + } +} + +void +pixops_composite_color (art_u8 *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + art_boolean dest_has_alpha, + art_u8 *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + art_boolean src_has_alpha, + double scale_x, + double scale_y, + ArtFilterLevel filter_level, + int overall_alpha, + int check_x, + int check_y, + int check_size, + art_u32 color1, + art_u32 color2) +{ + PixopsFilter filter; + PixopsLineFunc line_func; + +#ifdef USE_MMX + art_boolean found_mmx = pixops_have_mmx(); +#endif + + g_return_if_fail (!(dest_channels == 3 && dest_has_alpha)); + g_return_if_fail (!(src_channels == 3 && src_has_alpha)); + + if (!src_has_alpha && overall_alpha == 255) + pixops_scale (dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, + src_has_alpha, scale_x, scale_y, filter_level); + + switch (filter_level) + { + case ART_FILTER_NEAREST: + pixops_composite_color_nearest (dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha, + scale_x, scale_y, overall_alpha, + check_x, check_y, check_size, color1, color2); + return; + + case ART_FILTER_TILES: + tile_make_weights (&filter, scale_x, scale_y, overall_alpha / 255.); + break; + + case ART_FILTER_BILINEAR: + bilinear_make_fast_weights (&filter, scale_x, scale_y, overall_alpha / 255.); + break; + + case ART_FILTER_HYPER: + bilinear_make_weights (&filter, scale_x, scale_y, overall_alpha / 255.); + break; + } + +#ifdef USE_MMX + if (filter.n_x == 2 && filter.n_y == 2 && + dest_channels == 4 && src_channels == 4 && src_has_alpha && !dest_has_alpha && found_mmx) + line_func = composite_line_color_22_4a4_mmx_stub; + else +#endif + line_func = composite_line_color; + + pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, + src_has_alpha, scale_x, scale_y, check_x, check_y, check_size, color1, color2, + &filter, line_func, composite_pixel_color); + + g_free (filter.weights); +} + +void +pixops_composite (art_u8 *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + art_boolean dest_has_alpha, + art_u8 *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + art_boolean src_has_alpha, + double scale_x, + double scale_y, + ArtFilterLevel filter_level, + int overall_alpha) +{ + PixopsFilter filter; + PixopsLineFunc line_func; + +#ifdef USE_MMX + art_boolean found_mmx = pixops_have_mmx(); +#endif + + g_return_if_fail (!(dest_channels == 3 && dest_has_alpha)); + g_return_if_fail (!(src_channels == 3 && src_has_alpha)); + + if (!src_has_alpha && overall_alpha == 255) + pixops_scale (dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, + src_has_alpha, scale_x, scale_y, filter_level); + + switch (filter_level) + { + case ART_FILTER_NEAREST: + pixops_composite_nearest (dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, + src_has_alpha, scale_x, scale_y, overall_alpha); + return; + + case ART_FILTER_TILES: + tile_make_weights (&filter, scale_x, scale_y, overall_alpha / 255.); + break; + + case ART_FILTER_BILINEAR: + bilinear_make_fast_weights (&filter, scale_x, scale_y, overall_alpha / 255.); + break; + + case ART_FILTER_HYPER: + bilinear_make_weights (&filter, scale_x, scale_y, overall_alpha / 255.); + break; + } + + if (filter.n_x == 2 && filter.n_y == 2 && + dest_channels == 4 && src_channels == 4 && src_has_alpha && !dest_has_alpha) + { +#ifdef USE_MMX + if (found_mmx) + line_func = composite_line_22_4a4_mmx_stub; + else +#endif + line_func = composite_line_22_4a4; + } + else + line_func = composite_line; + + pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, + src_has_alpha, scale_x, scale_y, 0, 0, 0, 0, 0, + &filter, line_func, composite_pixel); + + g_free (filter.weights); +} + +void +pixops_scale (art_u8 *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + art_boolean dest_has_alpha, + art_u8 *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + art_boolean src_has_alpha, + double scale_x, + double scale_y, + ArtFilterLevel filter_level) +{ + PixopsFilter filter; + PixopsLineFunc line_func; + +#ifdef USE_MMX + art_boolean found_mmx = pixops_have_mmx(); +#endif + + g_return_if_fail (!(dest_channels == 3 && dest_has_alpha)); + g_return_if_fail (!(src_channels == 3 && src_has_alpha)); + g_return_if_fail (!(src_has_alpha && !dest_has_alpha)); + + switch (filter_level) + { + case ART_FILTER_NEAREST: + pixops_scale_nearest (dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha, + scale_x, scale_y); + return; + + case ART_FILTER_TILES: + tile_make_weights (&filter, scale_x, scale_y, 1.0); + break; + + case ART_FILTER_BILINEAR: + bilinear_make_fast_weights (&filter, scale_x, scale_y, 1.0); + break; + + case ART_FILTER_HYPER: + bilinear_make_weights (&filter, scale_x, scale_y, 1.0); + break; + } + +#ifdef USE_MMX + if (filter.n_x == 2 && filter.n_y == 2 && + found_mmx && dest_channels == 3 && src_channels == 3) + line_func = scale_line_22_33_mmx_stub; + else +#endif + line_func = scale_line; + + pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1, + dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, + src_has_alpha, scale_x, scale_y, 0, 0, 0, 0, 0, + &filter, line_func, scale_pixel); + + g_free (filter.weights); +} + diff --git a/gdk-pixbuf/pixops/pixops.h b/gdk-pixbuf/pixops/pixops.h new file mode 100644 index 0000000000..387b46bc17 --- /dev/null +++ b/gdk-pixbuf/pixops/pixops.h @@ -0,0 +1,80 @@ +#include +#include + +/* Scale src_buf from src_width / src_height by factors scale_x, scale_y + * and composite the portion corresponding to + * render_x, render_y, render_width, render_height in the new + * coordinate system into dest_buf starting at 0, 0 + */ +void pixops_composite (art_u8 *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + int dest_has_alpha, + art_u8 *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + int src_has_alpha, + double scale_x, + double scale_y, + ArtFilterLevel filter_level, + int overall_alpha); + +/* Scale src_buf from src_width / src_height by factors scale_x, scale_y + * and composite the portion corresponding to + * render_x, render_y, render_width, render_height in the new + * coordinate system against a checkboard with checks of size check_size + * of the colors color1 and color2 into dest_buf starting at 0, 0 + */ +void pixops_composite_color (art_u8 *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + int dest_has_alpha, + art_u8 *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + int src_has_alpha, + double scale_x, + double scale_y, + ArtFilterLevel filter_level, + int overall_alpha, + int check_x, + int check_y, + int check_size, + art_u32 color1, + art_u32 color2); + +/* Scale src_buf from src_width / src_height by factors scale_x, scale_y + * and composite the portion corresponding to + * render_x, render_y, render_width, render_height in the new + * coordinate system into dest_buf starting at 0, 0 + */ +void pixops_scale (art_u8 *dest_buf, + int render_x0, + int render_y0, + int render_x1, + int render_y1, + int dest_rowstride, + int dest_channels, + int dest_has_alpha, + art_u8 *src_buf, + int src_width, + int src_height, + int src_rowstride, + int src_channels, + int src_has_alpha, + double scale_x, + double scale_y, + ArtFilterLevel filter_level); + diff --git a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S new file mode 100644 index 0000000000..e253fc1889 --- /dev/null +++ b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S @@ -0,0 +1,152 @@ + .file "scale_line_22_33_mmx.S" + .version "01.01" +gcc2_compiled.: +.text + .align 16 +.globl pixops_scale_line_22_33_mmx + .type pixops_scale_line_22_33_mmx,@function +/* + * Arguments + * + * weights: 8(%ebp) + * p: 12(%ebp) %esi + * q1: 16(%ebp) + * q2: 20(%ebp) + * xstep: 24(%ebp) + * p_end: 28(%ebp) + * xinit: 32(%ebp) + * +*/ +pixops_scale_line_22_33_mmx: +/* + * Function call entry + */ + pushl %ebp + movl %esp,%ebp + subl $28,%esp + pushl %edi + pushl %esi + pushl %ebx +/* Locals: + * int x %ebx + * int x_scaled -24(%ebp) + */ + +/* + * Setup + */ +/* Initialize variables */ + movl 32(%ebp),%ebx + movl 32(%ebp),%edx + sarl $16,%edx + movl 12(%ebp),%esi + + cmpl %esi,28(%ebp) + je .out + +/* Load initial values into %mm1, %mm3 */ + leal (%edx,%edx,2),%edx # Multiply by 3 + + movl 16(%ebp),%edi + pxor %mm4, %mm4 + movzbl 2(%edi,%edx),%ecx + shll $16,%ecx + movzwl (%edi,%edx),%eax + orl %eax,%ecx + movd %ecx, %mm1 + punpcklbw %mm4, %mm1 + + movl 20(%ebp),%edi + movzbl 2(%edi,%edx),%ecx + shll $16,%ecx + movzwl (%edi,%edx),%eax + orl %eax,%ecx + movd %ecx, %mm3 + punpcklbw %mm4, %mm3 + + addl $65536,%ebx + movl %ebx,%edx + sarl $16,%edx + + jmp .newx + .p2align 4,,7 +.loop: +/* int x_index = (x & 0xf000) >> 12 */ + movl %ebx,%eax + andl $0xf000,%eax + shrl $7,%eax + + movq (%edi,%eax),%mm4 + pmullw %mm0,%mm4 + movq 8(%edi,%eax),%mm5 + pmullw %mm1,%mm5 + movq 16(%edi,%eax),%mm6 + movq 24(%edi,%eax),%mm7 + pmullw %mm2,%mm6 + pmullw %mm3,%mm7 + paddw %mm4, %mm5 + paddw %mm6, %mm7 + paddw %mm5, %mm7 + + psrlw $8, %mm7 + packuswb %mm7, %mm7 + movd %mm7, %eax + + movb %al, (%esi) + shrl $8, %eax + movw %ax, 1(%esi) + addl $3, %esi + + cmpl %esi,28(%ebp) + je .out + +/* x += x_step; */ + addl 24(%ebp),%ebx +/* x_scale = x >> 16; */ + movl %ebx,%edx + sarl $16,%edx + + cmpl %edx,-24(%ebp) + je .loop + +.newx: + movl %edx,-24(%ebp) +/* + * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2 + */ + movq %mm1, %mm0 + movq %mm3, %mm2 + + leal (%edx,%edx,2),%edx # Multiply by 3 + + movl 16(%ebp),%edi + pxor %mm4, %mm4 + movzbl 2(%edi,%edx),%ecx + shll $16,%ecx + movzwl (%edi,%edx),%eax + orl %eax,%ecx + movd %ecx, %mm1 + punpcklbw %mm4, %mm1 + + movl 20(%ebp),%edi + movzbl 2(%edi,%edx),%ecx + shll $16,%ecx + movzwl (%edi,%edx),%eax + orl %eax,%ecx + movd %ecx, %mm3 + punpcklbw %mm4, %mm3 + + movl 8(%ebp),%edi + + jmp .loop + +.out: + movl %esi,%eax + emms + leal -40(%ebp),%esp + popl %ebx + popl %esi + popl %edi + movl %ebp,%esp + popl %ebp + ret diff --git a/gdk-pixbuf/pixops/timescale.c b/gdk-pixbuf/pixops/timescale.c new file mode 100644 index 0000000000..1abefb84e9 --- /dev/null +++ b/gdk-pixbuf/pixops/timescale.c @@ -0,0 +1,227 @@ +#include +#include +#include +#include +#include +#include "pixops.h" + +struct timeval start_time; + +void start_timing (void) +{ + gettimeofday (&start_time, NULL); +} + +double +stop_timing (const char *test, int iterations, int bytes) +{ + struct timeval stop_time; + double msecs; + + gettimeofday (&stop_time, NULL); + if (stop_time.tv_usec < start_time.tv_usec) + { + stop_time.tv_usec += 1000000; + stop_time.tv_sec -= 1; + } + + msecs = (stop_time.tv_sec - start_time.tv_sec) * 1000. + + (stop_time.tv_usec - start_time.tv_usec) / 1000.; + + printf("%s%d\t%.1f\t\t%.2f\t\t%.2f\n", + test, iterations, msecs, msecs / iterations, ((double)bytes * iterations) / (1000*msecs)); + + return ((double)bytes * iterations) / (1000*msecs); +} + +void +init_array (double times[3][3][4]) +{ + int i, j, k; + + for (i=0; i<3; i++) + for (j=0; j<3; j++) + for (k=0; j<4; k++) + times[i][j][k] = -1; +} + +void +dump_array (double times[3][3][4]) +{ + int i, j, k; + + printf(" 3\t4\t4a\n"); + for (i=0; i<3; i++) + { + for (j=0; j<4; j++) + { + if (j == 0) + switch (i) + { + case 0: + printf("3 "); + break; + case 1: + printf("4 "); + break; + case 2: + printf("4a "); + break; + } + else + printf(" "); + + printf("%6.2f %6.2f %6.2f", + times[i][0][j], times[i][1][j], times[i][2][j]); + + switch (j) + { + case ART_FILTER_NEAREST: + printf (" NEAREST\n"); + break; + case ART_FILTER_TILES: + printf (" TILES\n"); + break; + case ART_FILTER_BILINEAR: + printf (" BILINEAR\n"); + break; + case ART_FILTER_HYPER: + printf (" HYPER\n"); + break; + } + } + } + printf("\n"); +} + +#define ITERS 10 + +int main (int argc, char **argv) +{ + int src_width, src_height, dest_width, dest_height; + char *src_buf, *dest_buf; + int src_index, dest_index; + int i; + double scale_times[3][3][4]; + double composite_times[3][3][4]; + double composite_color_times[3][3][4]; + + if (argc == 5) + { + src_width = atoi(argv[1]); + src_height = atoi(argv[2]); + dest_width = atoi(argv[3]); + dest_height = atoi(argv[4]); + } + else if (argc == 1) + { + src_width = 343; + src_height = 343; + dest_width = 711; + dest_height = 711; + } + else + { + fprintf (stderr, "Usage: scale [src_width src_height dest_width dest_height]\n"); + exit(1); + } + + + printf ("Scaling from (%d, %d) to (%d, %d)\n\n", src_width, src_height, dest_width, dest_height); + + for (src_index = 0; src_index < 3; src_index++) + for (dest_index = 0; dest_index < 3; dest_index++) + { + int src_channels = (src_index == 0) ? 3 : 4; + int src_has_alpha = (src_index == 2); + int dest_channels = (dest_index == 0) ? 3 : 4; + int dest_has_alpha = (dest_index == 2); + + int src_rowstride = (src_channels*src_width + 3) & ~3; + int dest_rowstride = (dest_channels *dest_width + 3) & ~3; + + int filter_level; + + src_buf = malloc(src_rowstride * src_height); + memset (src_buf, 0x80, src_rowstride * src_height); + + dest_buf = malloc(dest_rowstride * dest_height); + memset (dest_buf, 0x80, dest_rowstride * dest_height); + + for (filter_level = ART_FILTER_NEAREST ; filter_level <= ART_FILTER_HYPER; filter_level++) + { + printf ("src_channels = %d (%s); dest_channels = %d (%s); filter_level=", + src_channels, src_has_alpha ? "alpha" : "no alpha", + dest_channels, dest_has_alpha ? "alpha" : "no alpha"); + switch (filter_level) + { + case ART_FILTER_NEAREST: + printf ("ART_FILTER_NEAREST\n"); + break; + case ART_FILTER_TILES: + printf ("ART_FILTER_TILES\n"); + break; + case ART_FILTER_BILINEAR: + printf ("ART_FILTER_BILINEAR\n"); + break; + case ART_FILTER_HYPER: + printf ("ART_FILTER_HYPER\n"); + break; + } + + printf("\t\t\titers\ttotal\t\tmsecs/iter\tMpixels/sec\t\n"); + + + if (!(src_has_alpha && !dest_has_alpha)) + { + start_timing (); + for (i = 0; i < ITERS; i++) + { + pixops_scale (dest_buf, 0, 0, dest_width, dest_height, dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha, + (double)dest_width / src_width, (double)dest_height / src_height, + filter_level); + } + scale_times[src_index][dest_index][filter_level] = + stop_timing (" scale\t\t", ITERS, dest_height * dest_width); + } + + start_timing (); + for (i = 0; i < ITERS; i++) + { + pixops_composite (dest_buf, 0, 0, dest_width, dest_height, dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha, + (double)dest_width / src_width, (double)dest_height / src_height, + filter_level, 255); + } + composite_times[src_index][dest_index][filter_level] = + stop_timing (" composite\t\t", ITERS, dest_height * dest_width); + + start_timing (); + for (i = 0; i < ITERS; i++) + { + pixops_composite_color (dest_buf, 0, 0, dest_width, dest_height, dest_rowstride, dest_channels, dest_has_alpha, + src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha, + (double)dest_width / src_width, (double)dest_height / src_height, + filter_level, 255, 0, 0, 16, 0xaaaaaa, 0x555555); + } + composite_color_times[src_index][dest_index][filter_level] = + stop_timing (" composite color\t", ITERS, dest_height * dest_width); + + printf ("\n"); + } + printf ("\n"); + + free (src_buf); + free (dest_buf); + } + + printf ("SCALE\n=====\n\n"); + dump_array (scale_times); + + printf ("COMPOSITE\n=========\n\n"); + dump_array (composite_times); + + printf ("COMPOSITE_COLOR\n===============\n\n"); + dump_array (composite_color_times); +}