gtk/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S

219 lines
3.3 KiB
ArmAsm
Raw Normal View History

.file "composite_line_22_4a4_mmx.S"
.version "01.01"
gcc2_compiled.:
.text
.align 16
#ifndef __MINGW32__
.globl pixops_composite_line_22_4a4_mmx
.type pixops_composite_line_22_4a4_mmx,@function
pixops_composite_line_22_4a4_mmx:
#else
.globl _pixops_composite_line_22_4a4_mmx
_pixops_composite_line_22_4a4_mmx:
#endif
/*
* Arguments
*
* weights: 8(%ebp)
* p: 12(%ebp) %esi
* q1: 16(%ebp)
* q2: 20(%ebp)
* xstep: 24(%ebp)
* p_end: 28(%ebp)
* xinit: 32(%ebp)
*
*/
/*
* Function call entry
*/
pushl %ebp
movl %esp,%ebp
subl $28,%esp
pushl %edi
pushl %esi
pushl %ebx
/* Locals:
* int x %ebx
* int x_scaled -24(%ebp)
*/
/*
* Setup
*/
/* Initialize variables */
movl 32(%ebp),%ebx
movl 32(%ebp),%edx
sarl $16,%edx
movl 12(%ebp),%esi
movl %edx,-24(%ebp)
cmpl %esi,28(%ebp)
je .out
/* Load initial values into %mm1, %mm3 */
shll $2, %edx
pxor %mm4, %mm4
movl 16(%ebp),%edi
movl (%edi, %edx), %eax
movd (%edi, %edx), %mm5
punpcklbw %mm4, %mm5
shrl $24, %eax
movl $0x010101, %ecx
mull %ecx
orl $0xff000000, %eax
movd %eax, %mm1
punpcklbw %mm4, %mm1
pmullw %mm5,%mm1
movl -24(%ebp),%edx
shll $2, %edx
movl 20(%ebp),%edi
movl (%edi, %edx), %eax
movd (%edi, %edx), %mm5
punpcklbw %mm4, %mm5
shrl $24, %eax
movl $0x010101, %ecx
mull %ecx
orl $0xff000000, %eax
movd %eax, %mm3
punpcklbw %mm4, %mm3
pmullw %mm5,%mm3
psrlw $8,%mm1
psrlw $8,%mm3
addl $65536,%ebx
movl %ebx,%edx
sarl $16,%edx
jmp .newx
.p2align 4,,7
.loop:
/* int x_index = (x & 0xf000) >> 12 */
movl %ebx,%eax
andl $0xf000,%eax
shrl $7,%eax
movq (%edi,%eax),%mm4
pmullw %mm0,%mm4
movq 8(%edi,%eax),%mm5
pmullw %mm1,%mm5
movq 16(%edi,%eax),%mm6
movq 24(%edi,%eax),%mm7
pmullw %mm2,%mm6
pmullw %mm3,%mm7
paddw %mm4, %mm5
paddw %mm6, %mm7
paddw %mm5, %mm7
movl $0xffff,%ecx
movd %ecx,%mm4
psllq $48,%mm4
movq %mm4,%mm6
psubw %mm7,%mm4
pand %mm6,%mm4
movq %mm4,%mm5
psrlq $16,%mm4
por %mm4,%mm5
psrlq $32,%mm5
por %mm4,%mm5
psrlw $8,%mm5
movd (%esi),%mm7
pxor %mm4,%mm4
punpcklbw %mm4, %mm7
pmullw %mm7,%mm5
/* x += x_step; */
addl 24(%ebp),%ebx
/* x_scale = x >> 16; */
movl %ebx,%edx
sarl $16,%edx
paddw %mm5,%mm6
psrlw $8,%mm6
packuswb %mm6, %mm6
movd %mm6,(%esi)
addl $4, %esi
cmpl %esi,28(%ebp)
je .out
cmpl %edx,-24(%ebp)
je .loop
.newx:
movl %edx,-24(%ebp)
/*
* Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
*/
movq %mm1, %mm0
movq %mm3, %mm2
shll $2, %edx
# %mm4 will always be already clear here
# pxor %mm4, %mm4
movl 16(%ebp),%edi
movl (%edi, %edx), %eax
movd (%edi, %edx), %mm5
punpcklbw %mm4, %mm5
shrl $24, %eax
movl $0x010101, %ecx
mull %ecx
/*
* mull destroyed %edx, need to reconstitute
*/
movl -24(%ebp),%edx
shll $2, %edx
orl $0xff000000, %eax
movd %eax, %mm1
punpcklbw %mm4, %mm1
pmullw %mm5,%mm1
movl 20(%ebp),%edi
movl (%edi, %edx), %eax
movd (%edi, %edx), %mm5
punpcklbw %mm4, %mm5
shrl $24, %eax
movl $0x010101, %ecx
mull %ecx
orl $0xff000000, %eax
movd %eax, %mm3
punpcklbw %mm4, %mm3
pmullw %mm5,%mm3
psrlw $8,%mm1
psrlw $8,%mm3
movl 8(%ebp),%edi
jmp .loop
.out:
movl %esi,%eax
emms
leal -40(%ebp),%esp
popl %ebx
popl %esi
popl %edi
movl %ebp,%esp
popl %ebp
ret