gtk2/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S
Tor Lillqvist 773aacf9b6 Win32 build setup:
2000-07-23  Tor Lillqvist  <tml@iki.fi>

	Win32 build setup:

	* makefile.mingw.in
	* pixops/makefile.mingw.in
	* pixbuf_*.def: New files.

	* Makefile.am
	* pixops/Makefile.am: Add them. Add rule to generate makefile.mingw.

	* gdk-pixbuf-io.c (gtk_win32_get_installation_directory):
	New function, to get the GTK+ installation directory from
	the Windows Registry, where the installation program
	for whatever software includes GTK+ has stored it.

	Used to avoid having hardcoding the directory where to look for
	loaders. This function is needed by gtk, too, so it should
	really be just in one place. Maybe a small static library
	one level up from here?

	* gdk-pixbuf-animation.c (gdk_pixbuf_animation_new_from_file)
	* gdk-pixbuf-io.c (gdk_pixbuf_new_from_file):
	Open file in binary mode. This *is* standard C. (No-op on Unix,
	of course.)

	* io-jpeg.c: If we don't HAVE_SIGSETJMP, use normal setjmp().

	* io-tiff.c: Use g_get_tmp_dir(). If we don't HAVE_MKSTEMP, use
	mktemp() and open().
	(gdk_pixbuf__tiff_image_stop_load): g_free() also the tempname.

	* pixops/*.S: The Gas from mingw32 doesn't like the .type
	pseudo-op. Conditionalise on __MINGW32__, but probably
	should conditionalise on Gas version instead? Or could we
	do without .type on all systems?

	* pixops/timescale.c: Use g_get_current_time()
	instead of gettimeofday().
2000-07-22 23:50:19 +00:00

219 lines
3.3 KiB
ArmAsm

.file "composite_line_22_4a4_mmx.S"
.version "01.01"
gcc2_compiled.:
.text
.align 16
#ifndef __MINGW32__
.globl pixops_composite_line_22_4a4_mmx
.type pixops_composite_line_22_4a4_mmx,@function
pixops_composite_line_22_4a4_mmx:
#else
.globl _pixops_composite_line_22_4a4_mmx
_pixops_composite_line_22_4a4_mmx:
#endif
/*
* Arguments
*
* weights: 8(%ebp)
* p: 12(%ebp) %esi
* q1: 16(%ebp)
* q2: 20(%ebp)
* xstep: 24(%ebp)
* p_end: 28(%ebp)
* xinit: 32(%ebp)
*
*/
/*
* Function call entry
*/
pushl %ebp
movl %esp,%ebp
subl $28,%esp
pushl %edi
pushl %esi
pushl %ebx
/* Locals:
* int x %ebx
* int x_scaled -24(%ebp)
*/
/*
* Setup
*/
/* Initialize variables */
movl 32(%ebp),%ebx
movl 32(%ebp),%edx
sarl $16,%edx
movl 12(%ebp),%esi
movl %edx,-24(%ebp)
cmpl %esi,28(%ebp)
je .out
/* Load initial values into %mm1, %mm3 */
shll $2, %edx
pxor %mm4, %mm4
movl 16(%ebp),%edi
movl (%edi, %edx), %eax
movd (%edi, %edx), %mm5
punpcklbw %mm4, %mm5
shrl $24, %eax
movl $0x010101, %ecx
mull %ecx
orl $0xff000000, %eax
movd %eax, %mm1
punpcklbw %mm4, %mm1
pmullw %mm5,%mm1
movl -24(%ebp),%edx
shll $2, %edx
movl 20(%ebp),%edi
movl (%edi, %edx), %eax
movd (%edi, %edx), %mm5
punpcklbw %mm4, %mm5
shrl $24, %eax
movl $0x010101, %ecx
mull %ecx
orl $0xff000000, %eax
movd %eax, %mm3
punpcklbw %mm4, %mm3
pmullw %mm5,%mm3
psrlw $8,%mm1
psrlw $8,%mm3
addl $65536,%ebx
movl %ebx,%edx
sarl $16,%edx
jmp .newx
.p2align 4,,7
.loop:
/* int x_index = (x & 0xf000) >> 12 */
movl %ebx,%eax
andl $0xf000,%eax
shrl $7,%eax
movq (%edi,%eax),%mm4
pmullw %mm0,%mm4
movq 8(%edi,%eax),%mm5
pmullw %mm1,%mm5
movq 16(%edi,%eax),%mm6
movq 24(%edi,%eax),%mm7
pmullw %mm2,%mm6
pmullw %mm3,%mm7
paddw %mm4, %mm5
paddw %mm6, %mm7
paddw %mm5, %mm7
movl $0xffff,%ecx
movd %ecx,%mm4
psllq $48,%mm4
movq %mm4,%mm6
psubw %mm7,%mm4
pand %mm6,%mm4
movq %mm4,%mm5
psrlq $16,%mm4
por %mm4,%mm5
psrlq $32,%mm5
por %mm4,%mm5
psrlw $8,%mm5
movd (%esi),%mm7
pxor %mm4,%mm4
punpcklbw %mm4, %mm7
pmullw %mm7,%mm5
/* x += x_step; */
addl 24(%ebp),%ebx
/* x_scale = x >> 16; */
movl %ebx,%edx
sarl $16,%edx
paddw %mm5,%mm6
psrlw $8,%mm6
packuswb %mm6, %mm6
movd %mm6,(%esi)
addl $4, %esi
cmpl %esi,28(%ebp)
je .out
cmpl %edx,-24(%ebp)
je .loop
.newx:
movl %edx,-24(%ebp)
/*
* Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
*/
movq %mm1, %mm0
movq %mm3, %mm2
shll $2, %edx
# %mm4 will always be already clear here
# pxor %mm4, %mm4
movl 16(%ebp),%edi
movl (%edi, %edx), %eax
movd (%edi, %edx), %mm5
punpcklbw %mm4, %mm5
shrl $24, %eax
movl $0x010101, %ecx
mull %ecx
/*
* mull destroyed %edx, need to reconstitute
*/
movl -24(%ebp),%edx
shll $2, %edx
orl $0xff000000, %eax
movd %eax, %mm1
punpcklbw %mm4, %mm1
pmullw %mm5,%mm1
movl 20(%ebp),%edi
movl (%edi, %edx), %eax
movd (%edi, %edx), %mm5
punpcklbw %mm4, %mm5
shrl $24, %eax
movl $0x010101, %ecx
mull %ecx
orl $0xff000000, %eax
movd %eax, %mm3
punpcklbw %mm4, %mm3
pmullw %mm5,%mm3
psrlw $8,%mm1
psrlw $8,%mm3
movl 8(%ebp),%edi
jmp .loop
.out:
movl %esi,%eax
emms
leal -40(%ebp),%esp
popl %ebx
popl %esi
popl %edi
movl %ebp,%esp
popl %ebp
ret