2005-07-12 04:05:26 +00:00
|
|
|
/*
|
2005-07-12 15:58:57 +00:00
|
|
|
* Copyright (C) 2000 Red Hat, Inc
|
2005-07-12 04:05:26 +00:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the
|
|
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
|
|
* Boston, MA 02111-1307, USA.
|
|
|
|
*/
|
2000-01-05 21:33:58 +00:00
|
|
|
.file "scale_line_22_33_mmx.S"
|
|
|
|
.version "01.01"
|
|
|
|
gcc2_compiled.:
|
|
|
|
.text
|
|
|
|
.align 16
|
2000-07-22 23:50:19 +00:00
|
|
|
|
2006-07-02 15:28:33 +00:00
|
|
|
#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)
|
2000-07-22 23:50:19 +00:00
|
|
|
|
2005-02-28 18:09:37 +00:00
|
|
|
/* Magic indicating no need for an executable stack */
|
|
|
|
#if !defined __powerpc64__ && !defined __ia64__
|
|
|
|
.section .note.GNU-stack; .previous
|
|
|
|
#endif
|
|
|
|
|
2004-08-11 14:23:31 +00:00
|
|
|
.globl _pixops_scale_line_22_33_mmx
|
|
|
|
.type _pixops_scale_line_22_33_mmx,@function
|
|
|
|
_pixops_scale_line_22_33_mmx:
|
2000-07-22 23:50:19 +00:00
|
|
|
|
|
|
|
#else
|
|
|
|
|
2004-08-11 14:23:31 +00:00
|
|
|
.globl __pixops_scale_line_22_33_mmx
|
|
|
|
__pixops_scale_line_22_33_mmx:
|
2000-07-22 23:50:19 +00:00
|
|
|
|
|
|
|
#endif
|
2000-01-05 21:33:58 +00:00
|
|
|
/*
|
|
|
|
* Arguments
|
|
|
|
*
|
|
|
|
* weights: 8(%ebp)
|
|
|
|
* p: 12(%ebp) %esi
|
|
|
|
* q1: 16(%ebp)
|
|
|
|
* q2: 20(%ebp)
|
|
|
|
* xstep: 24(%ebp)
|
|
|
|
* p_end: 28(%ebp)
|
|
|
|
* xinit: 32(%ebp)
|
|
|
|
*
|
|
|
|
*/
|
2000-07-22 23:50:19 +00:00
|
|
|
|
2000-01-05 21:33:58 +00:00
|
|
|
/*
|
|
|
|
* Function call entry
|
|
|
|
*/
|
|
|
|
pushl %ebp
|
|
|
|
movl %esp,%ebp
|
|
|
|
subl $28,%esp
|
|
|
|
pushl %edi
|
|
|
|
pushl %esi
|
|
|
|
pushl %ebx
|
|
|
|
/* Locals:
|
|
|
|
* int x %ebx
|
|
|
|
* int x_scaled -24(%ebp)
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Setup
|
|
|
|
*/
|
|
|
|
/* Initialize variables */
|
|
|
|
movl 32(%ebp),%ebx
|
|
|
|
movl 32(%ebp),%edx
|
|
|
|
sarl $16,%edx
|
|
|
|
movl 12(%ebp),%esi
|
|
|
|
|
2002-01-11 22:54:53 +00:00
|
|
|
cmpl 28(%ebp),%esi
|
|
|
|
jnb .out
|
2000-01-05 21:33:58 +00:00
|
|
|
|
2002-01-04 00:34:06 +00:00
|
|
|
/* For the body of this loop, %mm01, %mm1, %mm2, %mm3 hold the 4 adjoining
|
|
|
|
* points we are interpolating between, as:
|
|
|
|
*
|
|
|
|
* 000000BB00GG00RR
|
|
|
|
*/
|
|
|
|
|
2000-01-05 21:33:58 +00:00
|
|
|
/* Load initial values into %mm1, %mm3 */
|
|
|
|
leal (%edx,%edx,2),%edx # Multiply by 3
|
|
|
|
|
|
|
|
movl 16(%ebp),%edi
|
|
|
|
pxor %mm4, %mm4
|
|
|
|
movzbl 2(%edi,%edx),%ecx
|
|
|
|
shll $16,%ecx
|
|
|
|
movzwl (%edi,%edx),%eax
|
|
|
|
orl %eax,%ecx
|
|
|
|
movd %ecx, %mm1
|
|
|
|
punpcklbw %mm4, %mm1
|
|
|
|
|
|
|
|
movl 20(%ebp),%edi
|
|
|
|
movzbl 2(%edi,%edx),%ecx
|
|
|
|
shll $16,%ecx
|
|
|
|
movzwl (%edi,%edx),%eax
|
|
|
|
orl %eax,%ecx
|
|
|
|
movd %ecx, %mm3
|
|
|
|
punpcklbw %mm4, %mm3
|
|
|
|
|
|
|
|
addl $65536,%ebx
|
|
|
|
movl %ebx,%edx
|
|
|
|
sarl $16,%edx
|
|
|
|
|
|
|
|
jmp .newx
|
|
|
|
.p2align 4,,7
|
|
|
|
.loop:
|
2002-01-04 00:34:06 +00:00
|
|
|
/* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y
|
|
|
|
* 16 4 0xf 2 2
|
|
|
|
*/
|
2000-01-05 21:33:58 +00:00
|
|
|
movl %ebx,%eax
|
|
|
|
andl $0xf000,%eax
|
|
|
|
shrl $7,%eax
|
|
|
|
|
2002-01-04 00:34:06 +00:00
|
|
|
/* At this point, %edi holds weights. Load the 4 weights into %mm4,%mm5,%mm6,%mm7, multiply and
|
|
|
|
* accumulate.
|
|
|
|
*/
|
2000-01-05 21:33:58 +00:00
|
|
|
movq (%edi,%eax),%mm4
|
|
|
|
pmullw %mm0,%mm4
|
|
|
|
movq 8(%edi,%eax),%mm5
|
|
|
|
pmullw %mm1,%mm5
|
|
|
|
movq 16(%edi,%eax),%mm6
|
|
|
|
movq 24(%edi,%eax),%mm7
|
|
|
|
pmullw %mm2,%mm6
|
|
|
|
pmullw %mm3,%mm7
|
|
|
|
paddw %mm4, %mm5
|
|
|
|
paddw %mm6, %mm7
|
|
|
|
paddw %mm5, %mm7
|
|
|
|
|
2002-01-04 00:34:06 +00:00
|
|
|
/* %mm7 holds the accumulated sum. Compute (C + 0x80) / 256
|
|
|
|
*/
|
|
|
|
pxor %mm4, %mm4
|
|
|
|
movl $8421504, %eax # 0x00808080
|
|
|
|
movd %eax, %mm6
|
|
|
|
punpcklbw %mm4, %mm6
|
|
|
|
paddw %mm6, %mm7
|
2000-01-05 21:33:58 +00:00
|
|
|
psrlw $8, %mm7
|
2002-01-04 00:34:06 +00:00
|
|
|
|
|
|
|
/* Pack into %eax and store result
|
|
|
|
*/
|
2000-01-05 21:33:58 +00:00
|
|
|
packuswb %mm7, %mm7
|
|
|
|
movd %mm7, %eax
|
|
|
|
|
|
|
|
movb %al, (%esi)
|
|
|
|
shrl $8, %eax
|
|
|
|
movw %ax, 1(%esi)
|
|
|
|
addl $3, %esi
|
|
|
|
|
|
|
|
cmpl %esi,28(%ebp)
|
|
|
|
je .out
|
|
|
|
|
|
|
|
/* x += x_step; */
|
|
|
|
addl 24(%ebp),%ebx
|
2002-01-04 00:34:06 +00:00
|
|
|
/* x_scaled = x >> 16; */
|
2000-01-05 21:33:58 +00:00
|
|
|
movl %ebx,%edx
|
|
|
|
sarl $16,%edx
|
|
|
|
|
|
|
|
cmpl %edx,-24(%ebp)
|
|
|
|
je .loop
|
|
|
|
|
|
|
|
.newx:
|
|
|
|
movl %edx,-24(%ebp)
|
|
|
|
/*
|
|
|
|
* Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
|
|
|
|
*/
|
|
|
|
movq %mm1, %mm0
|
|
|
|
movq %mm3, %mm2
|
|
|
|
|
|
|
|
leal (%edx,%edx,2),%edx # Multiply by 3
|
|
|
|
|
|
|
|
movl 16(%ebp),%edi
|
|
|
|
movzbl 2(%edi,%edx),%ecx
|
|
|
|
shll $16,%ecx
|
|
|
|
movzwl (%edi,%edx),%eax
|
|
|
|
orl %eax,%ecx
|
|
|
|
movd %ecx, %mm1
|
|
|
|
punpcklbw %mm4, %mm1
|
|
|
|
|
|
|
|
movl 20(%ebp),%edi
|
|
|
|
movzbl 2(%edi,%edx),%ecx
|
|
|
|
shll $16,%ecx
|
|
|
|
movzwl (%edi,%edx),%eax
|
|
|
|
orl %eax,%ecx
|
|
|
|
movd %ecx, %mm3
|
|
|
|
punpcklbw %mm4, %mm3
|
|
|
|
|
|
|
|
movl 8(%ebp),%edi
|
|
|
|
|
|
|
|
jmp .loop
|
|
|
|
|
|
|
|
.out:
|
|
|
|
movl %esi,%eax
|
|
|
|
emms
|
|
|
|
leal -40(%ebp),%esp
|
|
|
|
popl %ebx
|
|
|
|
popl %esi
|
|
|
|
popl %edi
|
|
|
|
movl %ebp,%esp
|
|
|
|
popl %ebp
|
|
|
|
ret
|