mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-30 08:40:07 +00:00
4132 lines
75 KiB
ArmAsm
4132 lines
75 KiB
ArmAsm
/* strcpy with SSSE3
|
|
Copyright (C) 2011 Free Software Foundation, Inc.
|
|
Contributed by Intel Corporation.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, write to the Free
|
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307 USA. */
|
|
|
|
|
|
#ifndef NOT_IN_libc
|
|
# ifndef USE_AS_STRCAT
|
|
# include <sysdep.h>
|
|
|
|
# define CFI_PUSH(REG) \
|
|
cfi_adjust_cfa_offset (4); \
|
|
cfi_rel_offset (REG, 0)
|
|
|
|
# define CFI_POP(REG) \
|
|
cfi_adjust_cfa_offset (-4); \
|
|
cfi_restore (REG)
|
|
|
|
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
|
# define POP(REG) popl REG; CFI_POP (REG)
|
|
|
|
# ifndef STRCPY
|
|
# define STRCPY __strcpy_ssse3
|
|
# endif
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
# define PARMS 8
|
|
# define ENTRANCE PUSH(%ebx)
|
|
# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx);
|
|
# define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
|
|
# else
|
|
# define PARMS 4
|
|
# define ENTRANCE
|
|
# define RETURN ret
|
|
# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
|
|
# endif
|
|
|
|
# define STR1 PARMS
|
|
# define STR2 STR1+4
|
|
# define LEN STR2+4
|
|
|
|
/* In this code following instructions are used for copying:
|
|
movb - 1 byte
|
|
movw - 2 byte
|
|
movl - 4 byte
|
|
movlpd - 8 byte
|
|
movaps - 16 byte - requires 16 byte alignment
|
|
of sourse and destination adresses.
|
|
16 byte alignment: adress is 32bit value,
|
|
right four bit of adress shall be 0.
|
|
*/
|
|
|
|
.text
|
|
ENTRY (STRCPY)
|
|
ENTRANCE
|
|
mov STR1(%esp), %edx
|
|
mov STR2(%esp), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
movl LEN(%esp), %ebx
|
|
test %ebx, %ebx
|
|
jz L(ExitTail0)
|
|
cmp $8, %ebx
|
|
jbe L(StrncpyExit8Bytes)
|
|
# endif
|
|
cmpb $0, (%ecx)
|
|
jz L(ExitTail1)
|
|
cmpb $0, 1(%ecx)
|
|
jz L(ExitTail2)
|
|
cmpb $0, 2(%ecx)
|
|
jz L(ExitTail3)
|
|
cmpb $0, 3(%ecx)
|
|
jz L(ExitTail4)
|
|
cmpb $0, 4(%ecx)
|
|
jz L(ExitTail5)
|
|
cmpb $0, 5(%ecx)
|
|
jz L(ExitTail6)
|
|
cmpb $0, 6(%ecx)
|
|
jz L(ExitTail7)
|
|
cmpb $0, 7(%ecx)
|
|
jz L(ExitTail8)
|
|
# ifdef USE_AS_STRNCPY
|
|
cmp $16, %ebx
|
|
jb L(StrncpyExit15Bytes)
|
|
# endif
|
|
cmpb $0, 8(%ecx)
|
|
jz L(ExitTail9)
|
|
cmpb $0, 9(%ecx)
|
|
jz L(ExitTail10)
|
|
cmpb $0, 10(%ecx)
|
|
jz L(ExitTail11)
|
|
cmpb $0, 11(%ecx)
|
|
jz L(ExitTail12)
|
|
cmpb $0, 12(%ecx)
|
|
jz L(ExitTail13)
|
|
cmpb $0, 13(%ecx)
|
|
jz L(ExitTail14)
|
|
cmpb $0, 14(%ecx)
|
|
jz L(ExitTail15)
|
|
# ifdef USE_AS_STRNCPY
|
|
cmp $16, %ebx
|
|
je L(ExitTail16)
|
|
# endif
|
|
cmpb $0, 15(%ecx)
|
|
jz L(ExitTail16)
|
|
|
|
PUSH (%edi)
|
|
mov %edx, %edi
|
|
# endif
|
|
PUSH (%esi)
|
|
# ifdef USE_AS_STRNCPY
|
|
mov %ecx, %esi
|
|
sub $16, %ebx
|
|
and $0xf, %esi
|
|
|
|
/* add 16 bytes ecx_shift to ebx */
|
|
|
|
add %esi, %ebx
|
|
# endif
|
|
lea 16(%ecx), %esi
|
|
/* Now:
|
|
esi = alignment_16(ecx) + ecx_shift + 16;
|
|
ecx_shift = ecx - alignment_16(ecx)
|
|
*/
|
|
and $-16, %esi
|
|
/* Now:
|
|
esi = alignment_16(ecx) + 16
|
|
*/
|
|
pxor %xmm0, %xmm0
|
|
movlpd (%ecx), %xmm1
|
|
movlpd %xmm1, (%edx)
|
|
/*
|
|
look if there is zero symbol in next 16 bytes of string
|
|
from esi to esi + 15 and form mask in xmm0
|
|
*/
|
|
pcmpeqb (%esi), %xmm0
|
|
movlpd 8(%ecx), %xmm1
|
|
movlpd %xmm1, 8(%edx)
|
|
|
|
/* convert byte mask in xmm0 to bit mask */
|
|
|
|
pmovmskb %xmm0, %eax
|
|
sub %ecx, %esi
|
|
|
|
/* esi = 16 - ecx_shift */
|
|
|
|
/* eax = 0: there isn't end of string from position esi to esi+15 */
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
mov %edx, %eax
|
|
lea 16(%edx), %edx
|
|
/* Now:
|
|
edx = edx + 16 = alignment_16(edx) + edx_shift + 16
|
|
*/
|
|
and $-16, %edx
|
|
|
|
/* Now: edx = alignment_16(edx) + 16 */
|
|
|
|
sub %edx, %eax
|
|
|
|
/* Now: eax = edx_shift - 16 */
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %esi
|
|
lea -1(%esi), %esi
|
|
and $1<<31, %esi
|
|
test %esi, %esi
|
|
jnz L(ContinueCopy)
|
|
lea 16(%ebx), %ebx
|
|
|
|
L(ContinueCopy):
|
|
# endif
|
|
sub %eax, %ecx
|
|
/* Now:
|
|
case ecx_shift >= edx_shift:
|
|
ecx = alignment_16(ecx) + (ecx_shift - edx_shift) + 16
|
|
case ecx_shift < edx_shift:
|
|
ecx = alignment_16(ecx) + (16 + ecx_shift - edx_shift)
|
|
*/
|
|
mov %ecx, %eax
|
|
and $0xf, %eax
|
|
/* Now:
|
|
case ecx_shift >= edx_shift: eax = ecx_shift - edx_shift
|
|
case ecx_shift < edx_shift: eax = (16 + ecx_shift - edx_shift)
|
|
eax can be 0, 1, ..., 15
|
|
*/
|
|
mov $0, %esi
|
|
|
|
/* case: ecx_shift == edx_shift */
|
|
|
|
jz L(Align16Both)
|
|
|
|
cmp $8, %eax
|
|
jae L(ShlHigh8)
|
|
cmp $1, %eax
|
|
je L(Shl1)
|
|
cmp $2, %eax
|
|
je L(Shl2)
|
|
cmp $3, %eax
|
|
je L(Shl3)
|
|
cmp $4, %eax
|
|
je L(Shl4)
|
|
cmp $5, %eax
|
|
je L(Shl5)
|
|
cmp $6, %eax
|
|
je L(Shl6)
|
|
jmp L(Shl7)
|
|
|
|
L(ShlHigh8):
|
|
je L(Shl8)
|
|
cmp $9, %eax
|
|
je L(Shl9)
|
|
cmp $10, %eax
|
|
je L(Shl10)
|
|
cmp $11, %eax
|
|
je L(Shl11)
|
|
cmp $12, %eax
|
|
je L(Shl12)
|
|
cmp $13, %eax
|
|
je L(Shl13)
|
|
cmp $14, %eax
|
|
je L(Shl14)
|
|
jmp L(Shl15)
|
|
|
|
L(Align16Both):
|
|
movaps (%ecx), %xmm1
|
|
movaps 16(%ecx), %xmm2
|
|
movaps %xmm1, (%edx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm3
|
|
movaps %xmm2, (%edx, %esi)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm4
|
|
movaps %xmm3, (%edx, %esi)
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm1
|
|
movaps %xmm4, (%edx, %esi)
|
|
pcmpeqb %xmm1, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm2
|
|
movaps %xmm1, (%edx, %esi)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm3
|
|
movaps %xmm2, (%edx, %esi)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps %xmm3, (%edx, %esi)
|
|
mov %ecx, %eax
|
|
lea 16(%ecx, %esi), %ecx
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
lea 48+64(%ebx, %eax), %ebx
|
|
# endif
|
|
mov $-0x40, %esi
|
|
|
|
L(Aligned64Loop):
|
|
movaps (%ecx), %xmm2
|
|
movaps 32(%ecx), %xmm3
|
|
movaps %xmm2, %xmm4
|
|
movaps 16(%ecx), %xmm5
|
|
movaps %xmm3, %xmm6
|
|
movaps 48(%ecx), %xmm7
|
|
pminub %xmm5, %xmm2
|
|
pminub %xmm7, %xmm3
|
|
pminub %xmm2, %xmm3
|
|
lea 64(%edx), %edx
|
|
pcmpeqb %xmm0, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
pmovmskb %xmm3, %eax
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeaveCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Aligned64Leave)
|
|
movaps %xmm4, -64(%edx)
|
|
movaps %xmm5, -48(%edx)
|
|
movaps %xmm6, -32(%edx)
|
|
movaps %xmm7, -16(%edx)
|
|
jmp L(Aligned64Loop)
|
|
|
|
L(Aligned64Leave):
|
|
# ifdef USE_AS_STRNCPY
|
|
lea 48(%ebx), %ebx
|
|
# endif
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm5, %xmm0
|
|
# ifdef USE_AS_STRNCPY
|
|
lea -16(%ebx), %ebx
|
|
# endif
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm4, -64(%edx)
|
|
test %eax, %eax
|
|
lea 16(%esi), %esi
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
# ifdef USE_AS_STRNCPY
|
|
lea -16(%ebx), %ebx
|
|
# endif
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm5, -48(%edx)
|
|
test %eax, %eax
|
|
lea 16(%esi), %esi
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps %xmm6, -32(%edx)
|
|
pcmpeqb %xmm7, %xmm0
|
|
# ifdef USE_AS_STRNCPY
|
|
lea -16(%ebx), %ebx
|
|
# endif
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl1):
|
|
movaps -1(%ecx), %xmm1
|
|
movaps 15(%ecx), %xmm2
|
|
L(Shl1Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl1LoopExit)
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 31(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl1LoopExit)
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 31(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl1LoopExit)
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 31(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl1LoopExit)
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 31(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -15(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -1(%ecx), %xmm1
|
|
|
|
L(Shl1LoopStart):
|
|
movaps 15(%ecx), %xmm2
|
|
movaps 31(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 47(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 63(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $1, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $1, %xmm3, %xmm4
|
|
jnz L(Shl1Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave1)
|
|
# endif
|
|
palignr $1, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl1LoopStart)
|
|
|
|
L(Shl1LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $15, %xmm6
|
|
mov $15, %esi
|
|
palignr $1, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl2):
|
|
movaps -2(%ecx), %xmm1
|
|
movaps 14(%ecx), %xmm2
|
|
L(Shl2Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl2LoopExit)
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 30(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl2LoopExit)
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 30(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl2LoopExit)
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 30(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl2LoopExit)
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 30(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -14(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -2(%ecx), %xmm1
|
|
|
|
L(Shl2LoopStart):
|
|
movaps 14(%ecx), %xmm2
|
|
movaps 30(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 46(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 62(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $2, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $2, %xmm3, %xmm4
|
|
jnz L(Shl2Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave2)
|
|
# endif
|
|
palignr $2, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl2LoopStart)
|
|
|
|
L(Shl2LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $14, %xmm6
|
|
mov $14, %esi
|
|
palignr $2, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl3):
|
|
movaps -3(%ecx), %xmm1
|
|
movaps 13(%ecx), %xmm2
|
|
L(Shl3Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl3LoopExit)
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 29(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl3LoopExit)
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 29(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl3LoopExit)
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 29(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl3LoopExit)
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 29(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -13(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -3(%ecx), %xmm1
|
|
|
|
L(Shl3LoopStart):
|
|
movaps 13(%ecx), %xmm2
|
|
movaps 29(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 45(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 61(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $3, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $3, %xmm3, %xmm4
|
|
jnz L(Shl3Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave3)
|
|
# endif
|
|
palignr $3, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl3LoopStart)
|
|
|
|
L(Shl3LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $13, %xmm6
|
|
mov $13, %esi
|
|
palignr $3, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl4):
|
|
movaps -4(%ecx), %xmm1
|
|
movaps 12(%ecx), %xmm2
|
|
L(Shl4Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl4LoopExit)
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 28(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl4LoopExit)
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 28(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl4LoopExit)
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 28(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl4LoopExit)
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 28(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -12(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -4(%ecx), %xmm1
|
|
|
|
L(Shl4LoopStart):
|
|
movaps 12(%ecx), %xmm2
|
|
movaps 28(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 44(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 60(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $4, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $4, %xmm3, %xmm4
|
|
jnz L(Shl4Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave4)
|
|
# endif
|
|
palignr $4, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl4LoopStart)
|
|
|
|
L(Shl4LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $12, %xmm6
|
|
mov $12, %esi
|
|
palignr $4, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl5):
|
|
movaps -5(%ecx), %xmm1
|
|
movaps 11(%ecx), %xmm2
|
|
L(Shl5Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl5LoopExit)
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 27(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl5LoopExit)
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 27(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl5LoopExit)
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 27(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl5LoopExit)
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 27(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -11(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -5(%ecx), %xmm1
|
|
|
|
L(Shl5LoopStart):
|
|
movaps 11(%ecx), %xmm2
|
|
movaps 27(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 43(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 59(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $5, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $5, %xmm3, %xmm4
|
|
jnz L(Shl5Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave5)
|
|
# endif
|
|
palignr $5, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl5LoopStart)
|
|
|
|
L(Shl5LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $11, %xmm6
|
|
mov $11, %esi
|
|
palignr $5, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl6):
|
|
movaps -6(%ecx), %xmm1
|
|
movaps 10(%ecx), %xmm2
|
|
L(Shl6Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl6LoopExit)
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 26(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl6LoopExit)
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 26(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl6LoopExit)
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 26(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl6LoopExit)
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 26(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -10(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -6(%ecx), %xmm1
|
|
|
|
L(Shl6LoopStart):
|
|
movaps 10(%ecx), %xmm2
|
|
movaps 26(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 42(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 58(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $6, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $6, %xmm3, %xmm4
|
|
jnz L(Shl6Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave6)
|
|
# endif
|
|
palignr $6, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl6LoopStart)
|
|
|
|
L(Shl6LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $10, %xmm6
|
|
mov $10, %esi
|
|
palignr $6, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl7):
|
|
movaps -7(%ecx), %xmm1
|
|
movaps 9(%ecx), %xmm2
|
|
L(Shl7Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl7LoopExit)
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 25(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl7LoopExit)
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 25(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl7LoopExit)
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 25(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl7LoopExit)
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 25(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -9(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -7(%ecx), %xmm1
|
|
|
|
L(Shl7LoopStart):
|
|
movaps 9(%ecx), %xmm2
|
|
movaps 25(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 41(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 57(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $7, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $7, %xmm3, %xmm4
|
|
jnz L(Shl7Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave7)
|
|
# endif
|
|
palignr $7, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl7LoopStart)
|
|
|
|
L(Shl7LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $9, %xmm6
|
|
mov $9, %esi
|
|
palignr $7, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl8):
|
|
movaps -8(%ecx), %xmm1
|
|
movaps 8(%ecx), %xmm2
|
|
L(Shl8Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl8LoopExit)
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 24(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl8LoopExit)
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 24(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl8LoopExit)
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 24(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl8LoopExit)
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 24(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -8(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -8(%ecx), %xmm1
|
|
|
|
L(Shl8LoopStart):
|
|
movaps 8(%ecx), %xmm2
|
|
movaps 24(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 40(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 56(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $8, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $8, %xmm3, %xmm4
|
|
jnz L(Shl8Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave8)
|
|
# endif
|
|
palignr $8, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl8LoopStart)
|
|
|
|
L(Shl8LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $8, %xmm6
|
|
mov $8, %esi
|
|
palignr $8, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl9):
|
|
movaps -9(%ecx), %xmm1
|
|
movaps 7(%ecx), %xmm2
|
|
L(Shl9Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl9LoopExit)
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 23(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl9LoopExit)
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 23(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl9LoopExit)
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 23(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl9LoopExit)
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 23(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -7(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -9(%ecx), %xmm1
|
|
|
|
L(Shl9LoopStart):
|
|
movaps 7(%ecx), %xmm2
|
|
movaps 23(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 39(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 55(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $9, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $9, %xmm3, %xmm4
|
|
jnz L(Shl9Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave9)
|
|
# endif
|
|
palignr $9, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl9LoopStart)
|
|
|
|
L(Shl9LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $7, %xmm6
|
|
mov $7, %esi
|
|
palignr $9, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl10):
|
|
movaps -10(%ecx), %xmm1
|
|
movaps 6(%ecx), %xmm2
|
|
L(Shl10Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl10LoopExit)
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 22(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl10LoopExit)
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 22(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl10LoopExit)
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 22(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl10LoopExit)
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 22(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -6(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -10(%ecx), %xmm1
|
|
|
|
L(Shl10LoopStart):
|
|
movaps 6(%ecx), %xmm2
|
|
movaps 22(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 38(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 54(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $10, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $10, %xmm3, %xmm4
|
|
jnz L(Shl10Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave10)
|
|
# endif
|
|
palignr $10, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl10LoopStart)
|
|
|
|
L(Shl10LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $6, %xmm6
|
|
mov $6, %esi
|
|
palignr $10, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl11):
|
|
movaps -11(%ecx), %xmm1
|
|
movaps 5(%ecx), %xmm2
|
|
L(Shl11Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl11LoopExit)
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 21(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl11LoopExit)
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 21(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl11LoopExit)
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 21(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl11LoopExit)
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 21(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -5(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -11(%ecx), %xmm1
|
|
|
|
L(Shl11LoopStart):
|
|
movaps 5(%ecx), %xmm2
|
|
movaps 21(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 37(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 53(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $11, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $11, %xmm3, %xmm4
|
|
jnz L(Shl11Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave11)
|
|
# endif
|
|
palignr $11, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl11LoopStart)
|
|
|
|
L(Shl11LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $5, %xmm6
|
|
mov $5, %esi
|
|
palignr $11, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl12):
|
|
movaps -12(%ecx), %xmm1
|
|
movaps 4(%ecx), %xmm2
|
|
L(Shl12Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl12LoopExit)
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 20(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl12LoopExit)
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 20(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl12LoopExit)
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 20(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl12LoopExit)
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 20(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -4(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -12(%ecx), %xmm1
|
|
|
|
L(Shl12LoopStart):
|
|
movaps 4(%ecx), %xmm2
|
|
movaps 20(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 36(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 52(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $12, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $12, %xmm3, %xmm4
|
|
jnz L(Shl12Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave12)
|
|
# endif
|
|
palignr $12, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl12LoopStart)
|
|
|
|
L(Shl12LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $4, %xmm6
|
|
mov $4, %esi
|
|
palignr $12, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl13):
|
|
movaps -13(%ecx), %xmm1
|
|
movaps 3(%ecx), %xmm2
|
|
L(Shl13Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl13LoopExit)
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 19(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl13LoopExit)
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 19(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl13LoopExit)
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 19(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl13LoopExit)
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 19(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -3(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -13(%ecx), %xmm1
|
|
|
|
L(Shl13LoopStart):
|
|
movaps 3(%ecx), %xmm2
|
|
movaps 19(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 35(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 51(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $13, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $13, %xmm3, %xmm4
|
|
jnz L(Shl13Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave13)
|
|
# endif
|
|
palignr $13, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl13LoopStart)
|
|
|
|
L(Shl13LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $3, %xmm6
|
|
mov $3, %esi
|
|
palignr $13, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl14):
|
|
movaps -14(%ecx), %xmm1
|
|
movaps 2(%ecx), %xmm2
|
|
L(Shl14Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl14LoopExit)
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 18(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl14LoopExit)
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 18(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl14LoopExit)
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 18(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl14LoopExit)
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 18(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -2(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -14(%ecx), %xmm1
|
|
|
|
L(Shl14LoopStart):
|
|
movaps 2(%ecx), %xmm2
|
|
movaps 18(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 34(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 50(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $14, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $14, %xmm3, %xmm4
|
|
jnz L(Shl14Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave14)
|
|
# endif
|
|
palignr $14, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl14LoopStart)
|
|
|
|
L(Shl14LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $2, %xmm6
|
|
mov $2, %esi
|
|
palignr $14, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl15):
|
|
movaps -15(%ecx), %xmm1
|
|
movaps 1(%ecx), %xmm2
|
|
L(Shl15Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl15LoopExit)
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 17(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl15LoopExit)
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 17(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl15LoopExit)
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 17(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl15LoopExit)
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
lea 17(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -1(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -15(%ecx), %xmm1
|
|
|
|
L(Shl15LoopStart):
|
|
movaps 1(%ecx), %xmm2
|
|
movaps 17(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 33(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 49(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $15, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $15, %xmm3, %xmm4
|
|
jnz L(Shl15Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave15)
|
|
# endif
|
|
palignr $15, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl15LoopStart)
|
|
|
|
L(Shl15LoopExit):
|
|
movaps (%edx), %xmm6
|
|
psrldq $1, %xmm6
|
|
mov $1, %esi
|
|
palignr $15, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
# ifdef USE_AS_STRCAT
|
|
jmp L(CopyFrom1To16Bytes)
|
|
# endif
|
|
|
|
# ifndef USE_AS_STRCAT
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16Bytes):
|
|
# ifdef USE_AS_STRNCPY
|
|
add $16, %ebx
|
|
# endif
|
|
add %esi, %edx
|
|
add %esi, %ecx
|
|
|
|
POP (%esi)
|
|
test %al, %al
|
|
jz L(ExitHigh)
|
|
test $0x01, %al
|
|
jnz L(Exit1)
|
|
test $0x02, %al
|
|
jnz L(Exit2)
|
|
test $0x04, %al
|
|
jnz L(Exit3)
|
|
test $0x08, %al
|
|
jnz L(Exit4)
|
|
test $0x10, %al
|
|
jnz L(Exit5)
|
|
test $0x20, %al
|
|
jnz L(Exit6)
|
|
test $0x40, %al
|
|
jnz L(Exit7)
|
|
|
|
.p2align 4
|
|
L(Exit8):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 7(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $8, %ebx
|
|
lea 8(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitHigh):
|
|
test $0x01, %ah
|
|
jnz L(Exit9)
|
|
test $0x02, %ah
|
|
jnz L(Exit10)
|
|
test $0x04, %ah
|
|
jnz L(Exit11)
|
|
test $0x08, %ah
|
|
jnz L(Exit12)
|
|
test $0x10, %ah
|
|
jnz L(Exit13)
|
|
test $0x20, %ah
|
|
jnz L(Exit14)
|
|
test $0x40, %ah
|
|
jnz L(Exit15)
|
|
|
|
.p2align 4
|
|
L(Exit16):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 8(%ecx), %xmm0
|
|
movlpd %xmm0, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 15(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
lea 16(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
|
|
CFI_PUSH(%esi)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesCase2):
|
|
add $16, %ebx
|
|
add %esi, %ecx
|
|
lea (%esi, %edx), %esi
|
|
lea -9(%ebx), %edx
|
|
and $1<<7, %dh
|
|
or %al, %dh
|
|
test %dh, %dh
|
|
lea (%esi), %edx
|
|
POP (%esi)
|
|
jz L(ExitHighCase2)
|
|
|
|
cmp $1, %ebx
|
|
je L(Exit1)
|
|
test $0x01, %al
|
|
jnz L(Exit1)
|
|
cmp $2, %ebx
|
|
je L(Exit2)
|
|
test $0x02, %al
|
|
jnz L(Exit2)
|
|
cmp $3, %ebx
|
|
je L(Exit3)
|
|
test $0x04, %al
|
|
jnz L(Exit3)
|
|
cmp $4, %ebx
|
|
je L(Exit4)
|
|
test $0x08, %al
|
|
jnz L(Exit4)
|
|
cmp $5, %ebx
|
|
je L(Exit5)
|
|
test $0x10, %al
|
|
jnz L(Exit5)
|
|
cmp $6, %ebx
|
|
je L(Exit6)
|
|
test $0x20, %al
|
|
jnz L(Exit6)
|
|
cmp $7, %ebx
|
|
je L(Exit7)
|
|
test $0x40, %al
|
|
jnz L(Exit7)
|
|
jmp L(Exit8)
|
|
|
|
.p2align 4
|
|
L(ExitHighCase2):
|
|
cmp $9, %ebx
|
|
je L(Exit9)
|
|
test $0x01, %ah
|
|
jnz L(Exit9)
|
|
cmp $10, %ebx
|
|
je L(Exit10)
|
|
test $0x02, %ah
|
|
jnz L(Exit10)
|
|
cmp $11, %ebx
|
|
je L(Exit11)
|
|
test $0x04, %ah
|
|
jnz L(Exit11)
|
|
cmp $12, %ebx
|
|
je L(Exit12)
|
|
test $0x8, %ah
|
|
jnz L(Exit12)
|
|
cmp $13, %ebx
|
|
je L(Exit13)
|
|
test $0x10, %ah
|
|
jnz L(Exit13)
|
|
cmp $14, %ebx
|
|
je L(Exit14)
|
|
test $0x20, %ah
|
|
jnz L(Exit14)
|
|
cmp $15, %ebx
|
|
je L(Exit15)
|
|
test $0x40, %ah
|
|
jnz L(Exit15)
|
|
jmp L(Exit16)
|
|
|
|
CFI_PUSH(%esi)
|
|
|
|
L(CopyFrom1To16BytesCase2OrCase3):
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesCase3):
|
|
add $16, %ebx
|
|
add %esi, %edx
|
|
add %esi, %ecx
|
|
|
|
POP (%esi)
|
|
cmp $16, %ebx
|
|
je L(Exit16)
|
|
cmp $8, %ebx
|
|
je L(Exit8)
|
|
jg L(More8Case3)
|
|
cmp $4, %ebx
|
|
je L(Exit4)
|
|
jg L(More4Case3)
|
|
cmp $2, %ebx
|
|
jl L(Exit1)
|
|
je L(Exit2)
|
|
jg L(Exit3)
|
|
L(More8Case3): /* but less than 16 */
|
|
cmp $12, %ebx
|
|
je L(Exit12)
|
|
jl L(Less12Case3)
|
|
cmp $14, %ebx
|
|
jl L(Exit13)
|
|
je L(Exit14)
|
|
jg L(Exit15)
|
|
L(More4Case3): /* but less than 8 */
|
|
cmp $6, %ebx
|
|
jl L(Exit5)
|
|
je L(Exit6)
|
|
jg L(Exit7)
|
|
L(Less12Case3): /* but more than 8 */
|
|
cmp $10, %ebx
|
|
jl L(Exit9)
|
|
je L(Exit10)
|
|
jg L(Exit11)
|
|
# endif
|
|
|
|
.p2align 4
|
|
L(Exit1):
|
|
movb (%ecx), %al
|
|
movb %al, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea (%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $1, %ebx
|
|
lea 1(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit2):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 1(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $2, %ebx
|
|
lea 2(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit3):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
movb 2(%ecx), %al
|
|
movb %al, 2(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 2(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $3, %ebx
|
|
lea 3(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit4):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 3(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $4, %ebx
|
|
lea 4(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit5):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movb 4(%ecx), %al
|
|
movb %al, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 4(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $5, %ebx
|
|
lea 5(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit6):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movw 4(%ecx), %ax
|
|
movw %ax, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 5(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $6, %ebx
|
|
lea 6(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit7):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 3(%ecx), %eax
|
|
movl %eax, 3(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 6(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $7, %ebx
|
|
lea 7(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit9):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movb 8(%ecx), %al
|
|
movb %al, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 8(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $9, %ebx
|
|
lea 9(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit10):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movw 8(%ecx), %ax
|
|
movw %ax, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 9(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $10, %ebx
|
|
lea 10(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit11):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movl 7(%ecx), %eax
|
|
movl %eax, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 10(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $11, %ebx
|
|
lea 11(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit12):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movl 8(%ecx), %eax
|
|
movl %eax, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 11(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $12, %ebx
|
|
lea 12(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit13):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 5(%ecx), %xmm0
|
|
movlpd %xmm0, 5(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 12(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $13, %ebx
|
|
lea 13(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit14):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 6(%ecx), %xmm0
|
|
movlpd %xmm0, 6(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 13(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $14, %ebx
|
|
lea 14(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit15):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 7(%ecx), %xmm0
|
|
movlpd %xmm0, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 14(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $15, %ebx
|
|
lea 15(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
CFI_POP (%edi)
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
.p2align 4
|
|
L(Fill0):
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill1):
|
|
movb %dl, (%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill2):
|
|
movw %dx, (%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill3):
|
|
movw %dx, (%ecx)
|
|
movb %dl, 2(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill4):
|
|
movl %edx, (%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill5):
|
|
movl %edx, (%ecx)
|
|
movb %dl, 4(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill6):
|
|
movl %edx, (%ecx)
|
|
movw %dx, 4(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill7):
|
|
movl %edx, (%ecx)
|
|
movl %edx, 3(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill8):
|
|
movlpd %xmm0, (%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill9):
|
|
movlpd %xmm0, (%ecx)
|
|
movb %dl, 8(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill10):
|
|
movlpd %xmm0, (%ecx)
|
|
movw %dx, 8(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill11):
|
|
movlpd %xmm0, (%ecx)
|
|
movl %edx, 7(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill12):
|
|
movlpd %xmm0, (%ecx)
|
|
movl %edx, 8(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill13):
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 5(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill14):
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 6(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill15):
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 7(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill16):
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 8(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyFillExit1):
|
|
lea 16(%ebx), %ebx
|
|
L(FillFrom1To16Bytes):
|
|
test %ebx, %ebx
|
|
jz L(Fill0)
|
|
cmp $16, %ebx
|
|
je L(Fill16)
|
|
cmp $8, %ebx
|
|
je L(Fill8)
|
|
jg L(FillMore8)
|
|
cmp $4, %ebx
|
|
je L(Fill4)
|
|
jg L(FillMore4)
|
|
cmp $2, %ebx
|
|
jl L(Fill1)
|
|
je L(Fill2)
|
|
jg L(Fill3)
|
|
L(FillMore8): /* but less than 16 */
|
|
cmp $12, %ebx
|
|
je L(Fill12)
|
|
jl L(FillLess12)
|
|
cmp $14, %ebx
|
|
jl L(Fill13)
|
|
je L(Fill14)
|
|
jg L(Fill15)
|
|
L(FillMore4): /* but less than 8 */
|
|
cmp $6, %ebx
|
|
jl L(Fill5)
|
|
je L(Fill6)
|
|
jg L(Fill7)
|
|
L(FillLess12): /* but more than 8 */
|
|
cmp $10, %ebx
|
|
jl L(Fill9)
|
|
je L(Fill10)
|
|
jmp L(Fill11)
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
.p2align 4
|
|
L(StrncpyFillTailWithZero1):
|
|
POP (%edi)
|
|
L(StrncpyFillTailWithZero):
|
|
pxor %xmm0, %xmm0
|
|
xor %edx, %edx
|
|
sub $16, %ebx
|
|
jbe L(StrncpyFillExit1)
|
|
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 8(%ecx)
|
|
|
|
lea 16(%ecx), %ecx
|
|
|
|
mov %ecx, %edx
|
|
and $0xf, %edx
|
|
sub %edx, %ecx
|
|
add %edx, %ebx
|
|
xor %edx, %edx
|
|
sub $64, %ebx
|
|
jb L(StrncpyFillLess64)
|
|
|
|
L(StrncpyFillLoopMovdqa):
|
|
movdqa %xmm0, (%ecx)
|
|
movdqa %xmm0, 16(%ecx)
|
|
movdqa %xmm0, 32(%ecx)
|
|
movdqa %xmm0, 48(%ecx)
|
|
lea 64(%ecx), %ecx
|
|
sub $64, %ebx
|
|
jae L(StrncpyFillLoopMovdqa)
|
|
|
|
L(StrncpyFillLess64):
|
|
add $32, %ebx
|
|
jl L(StrncpyFillLess32)
|
|
movdqa %xmm0, (%ecx)
|
|
movdqa %xmm0, 16(%ecx)
|
|
lea 32(%ecx), %ecx
|
|
sub $16, %ebx
|
|
jl L(StrncpyFillExit1)
|
|
movdqa %xmm0, (%ecx)
|
|
lea 16(%ecx), %ecx
|
|
jmp L(FillFrom1To16Bytes)
|
|
|
|
L(StrncpyFillLess32):
|
|
add $16, %ebx
|
|
jl L(StrncpyFillExit1)
|
|
movdqa %xmm0, (%ecx)
|
|
lea 16(%ecx), %ecx
|
|
jmp L(FillFrom1To16Bytes)
|
|
# endif
|
|
|
|
.p2align 4
|
|
L(ExitTail1):
|
|
movb (%ecx), %al
|
|
movb %al, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea (%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $1, %ebx
|
|
lea 1(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail2):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 1(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $2, %ebx
|
|
lea 2(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail3):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
movb 2(%ecx), %al
|
|
movb %al, 2(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 2(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $3, %ebx
|
|
lea 3(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail4):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 3(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $4, %ebx
|
|
lea 4(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail5):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movb 4(%ecx), %al
|
|
movb %al, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 4(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $5, %ebx
|
|
lea 5(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail6):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movw 4(%ecx), %ax
|
|
movw %ax, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 5(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $6, %ebx
|
|
lea 6(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail7):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 3(%ecx), %eax
|
|
movl %eax, 3(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 6(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $7, %ebx
|
|
lea 7(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail8):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 7(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $8, %ebx
|
|
lea 8(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail9):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movb 8(%ecx), %al
|
|
movb %al, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 8(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $9, %ebx
|
|
lea 9(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail10):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movw 8(%ecx), %ax
|
|
movw %ax, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 9(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $10, %ebx
|
|
lea 10(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail11):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movl 7(%ecx), %eax
|
|
movl %eax, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 10(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $11, %ebx
|
|
lea 11(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail12):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movl 8(%ecx), %eax
|
|
movl %eax, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 11(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $12, %ebx
|
|
lea 12(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail13):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 5(%ecx), %xmm0
|
|
movlpd %xmm0, 5(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 12(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $13, %ebx
|
|
lea 13(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail14):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 6(%ecx), %xmm0
|
|
movlpd %xmm0, 6(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 13(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $14, %ebx
|
|
lea 14(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail15):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 7(%ecx), %xmm0
|
|
movlpd %xmm0, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 14(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $15, %ebx
|
|
lea 15(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail16):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 8(%ecx), %xmm0
|
|
movlpd %xmm0, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 15(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
lea 16(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
#endif
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
# ifndef USE_AS_STRCAT
|
|
CFI_PUSH (%esi)
|
|
CFI_PUSH (%edi)
|
|
# endif
|
|
L(StrncpyLeaveCase2OrCase3):
|
|
test %eax, %eax
|
|
jnz L(Aligned64LeaveCase2)
|
|
|
|
L(Aligned64LeaveCase3):
|
|
add $48, %ebx
|
|
jle L(CopyFrom1To16BytesCase3)
|
|
movaps %xmm4, -64(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase3)
|
|
movaps %xmm5, -48(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase3)
|
|
movaps %xmm6, -32(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(Aligned64LeaveCase2):
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
add $48, %ebx
|
|
jle L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm5, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm4, -64(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm5, -48(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm7, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm6, -32(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
jmp L(CopyFrom1To16BytesCase2)
|
|
/* -------------------------------------------------- */
|
|
L(StrncpyExit1Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $15, %xmm6
|
|
mov $15, %esi
|
|
palignr $1, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit2Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $14, %xmm6
|
|
mov $14, %esi
|
|
palignr $2, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit3Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $13, %xmm6
|
|
mov $13, %esi
|
|
palignr $3, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit4Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $12, %xmm6
|
|
mov $12, %esi
|
|
palignr $4, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit5Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $11, %xmm6
|
|
mov $11, %esi
|
|
palignr $5, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit6Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $10, %xmm6
|
|
mov $10, %esi
|
|
palignr $6, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit7Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $9, %xmm6
|
|
mov $9, %esi
|
|
palignr $7, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit8Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $8, %xmm6
|
|
mov $8, %esi
|
|
palignr $8, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit9Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $7, %xmm6
|
|
mov $7, %esi
|
|
palignr $9, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit10Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $6, %xmm6
|
|
mov $6, %esi
|
|
palignr $10, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit11Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $5, %xmm6
|
|
mov $5, %esi
|
|
palignr $11, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit12Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $4, %xmm6
|
|
mov $4, %esi
|
|
palignr $12, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit13Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $3, %xmm6
|
|
mov $3, %esi
|
|
palignr $13, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit14Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $2, %xmm6
|
|
mov $2, %esi
|
|
palignr $14, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyExit15Case2OrCase3):
|
|
movaps (%edx), %xmm6
|
|
psrldq $1, %xmm6
|
|
mov $1, %esi
|
|
palignr $15, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave1):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit1)
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 31(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1)
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 31+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit1):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $15, %xmm6
|
|
palignr $1, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 15(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave2):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit2)
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 30(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2)
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 30+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit2):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $14, %xmm6
|
|
palignr $2, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 14(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave3):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit3)
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 29(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3)
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 29+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit3):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $13, %xmm6
|
|
palignr $3, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 13(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave4):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit4)
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 28(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4)
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 28+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit4):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $12, %xmm6
|
|
palignr $4, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 12(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave5):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit5)
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 27(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5)
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 27+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit5):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $11, %xmm6
|
|
palignr $5, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 11(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave6):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit6)
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 26(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6)
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 26+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit6):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $10, %xmm6
|
|
palignr $6, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 10(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave7):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit7)
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 25(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7)
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 25+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit7):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $9, %xmm6
|
|
palignr $7, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 9(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave8):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit8)
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 24(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8)
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 24+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit8):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $8, %xmm6
|
|
palignr $8, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 8(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave9):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit9)
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 23(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9)
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 23+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit9):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $7, %xmm6
|
|
palignr $9, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 7(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave10):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit10)
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 22(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10)
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 22+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit10):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $6, %xmm6
|
|
palignr $10, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 6(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave11):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit11)
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 21(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11)
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 21+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit11):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $5, %xmm6
|
|
palignr $11, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 5(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave12):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit12)
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 20(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12)
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 20+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit12):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $4, %xmm6
|
|
palignr $12, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 4(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave13):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit13)
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 19(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13)
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 19+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit13):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $3, %xmm6
|
|
palignr $13, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 3(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave14):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit14)
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 18(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14)
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 18+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit14):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $2, %xmm6
|
|
palignr $14, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 2(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave15):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit15)
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 17(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
movaps %xmm2, %xmm3
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15)
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
movaps 17+16(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15)
|
|
movaps %xmm2, %xmm1
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15)
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
|
|
L(StrncpyExit15):
|
|
movaps (%edx, %esi), %xmm6
|
|
psrldq $1, %xmm6
|
|
palignr $15, %xmm1, %xmm6
|
|
movaps %xmm6, (%edx, %esi)
|
|
lea 1(%esi), %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
# endif
|
|
|
|
# ifndef USE_AS_STRCAT
|
|
# ifdef USE_AS_STRNCPY
|
|
CFI_POP (%esi)
|
|
CFI_POP (%edi)
|
|
|
|
.p2align 4
|
|
L(ExitTail0):
|
|
movl %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit15Bytes):
|
|
cmp $9, %ebx
|
|
je L(ExitTail9)
|
|
cmpb $0, 8(%ecx)
|
|
jz L(ExitTail9)
|
|
cmp $10, %ebx
|
|
je L(ExitTail10)
|
|
cmpb $0, 9(%ecx)
|
|
jz L(ExitTail10)
|
|
cmp $11, %ebx
|
|
je L(ExitTail11)
|
|
cmpb $0, 10(%ecx)
|
|
jz L(ExitTail11)
|
|
cmp $12, %ebx
|
|
je L(ExitTail12)
|
|
cmpb $0, 11(%ecx)
|
|
jz L(ExitTail12)
|
|
cmp $13, %ebx
|
|
je L(ExitTail13)
|
|
cmpb $0, 12(%ecx)
|
|
jz L(ExitTail13)
|
|
cmp $14, %ebx
|
|
je L(ExitTail14)
|
|
cmpb $0, 13(%ecx)
|
|
jz L(ExitTail14)
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 7(%ecx), %xmm0
|
|
movlpd %xmm0, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 14(%edx), %eax
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit8Bytes):
|
|
cmp $1, %ebx
|
|
je L(ExitTail1)
|
|
cmpb $0, (%ecx)
|
|
jz L(ExitTail1)
|
|
cmp $2, %ebx
|
|
je L(ExitTail2)
|
|
cmpb $0, 1(%ecx)
|
|
jz L(ExitTail2)
|
|
cmp $3, %ebx
|
|
je L(ExitTail3)
|
|
cmpb $0, 2(%ecx)
|
|
jz L(ExitTail3)
|
|
cmp $4, %ebx
|
|
je L(ExitTail4)
|
|
cmpb $0, 3(%ecx)
|
|
jz L(ExitTail4)
|
|
cmp $5, %ebx
|
|
je L(ExitTail5)
|
|
cmpb $0, 4(%ecx)
|
|
jz L(ExitTail5)
|
|
cmp $6, %ebx
|
|
je L(ExitTail6)
|
|
cmpb $0, 5(%ecx)
|
|
jz L(ExitTail6)
|
|
cmp $7, %ebx
|
|
je L(ExitTail7)
|
|
cmpb $0, 6(%ecx)
|
|
jz L(ExitTail7)
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 7(%edx), %eax
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN
|
|
# endif
|
|
|
|
|
|
END (STRCPY)
|
|
# endif
|
|
#endif
|