2010-01-12 19:22:03 +00:00
|
|
|
/* memcpy with SSSE3
|
2022-01-01 18:54:23 +00:00
|
|
|
Copyright (C) 2010-2022 Free Software Foundation, Inc.
|
2010-01-12 19:22:03 +00:00
|
|
|
This file is part of the GNU C Library.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
2012-02-09 23:18:22 +00:00
|
|
|
License along with the GNU C Library; if not, see
|
Prefer https to http for gnu.org and fsf.org URLs
Also, change sources.redhat.com to sourceware.org.
This patch was automatically generated by running the following shell
script, which uses GNU sed, and which avoids modifying files imported
from upstream:
sed -ri '
s,(http|ftp)(://(.*\.)?(gnu|fsf|sourceware)\.org($|[^.]|\.[^a-z])),https\2,g
s,(http|ftp)(://(.*\.)?)sources\.redhat\.com($|[^.]|\.[^a-z]),https\2sourceware.org\4,g
' \
$(find $(git ls-files) -prune -type f \
! -name '*.po' \
! -name 'ChangeLog*' \
! -path COPYING ! -path COPYING.LIB \
! -path manual/fdl-1.3.texi ! -path manual/lgpl-2.1.texi \
! -path manual/texinfo.tex ! -path scripts/config.guess \
! -path scripts/config.sub ! -path scripts/install-sh \
! -path scripts/mkinstalldirs ! -path scripts/move-if-change \
! -path INSTALL ! -path locale/programs/charmap-kw.h \
! -path po/libc.pot ! -path sysdeps/gnu/errlist.c \
! '(' -name configure \
-execdir test -f configure.ac -o -f configure.in ';' ')' \
! '(' -name preconfigure \
-execdir test -f preconfigure.ac ';' ')' \
-print)
and then by running 'make dist-prepare' to regenerate files built
from the altered files, and then executing the following to cleanup:
chmod a+x sysdeps/unix/sysv/linux/riscv/configure
# Omit irrelevant whitespace and comment-only changes,
# perhaps from a slightly-different Autoconf version.
git checkout -f \
sysdeps/csky/configure \
sysdeps/hppa/configure \
sysdeps/riscv/configure \
sysdeps/unix/sysv/linux/csky/configure
# Omit changes that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/powerpc/powerpc64/ppc-mcount.S: trailing lines
git checkout -f \
sysdeps/powerpc/powerpc64/ppc-mcount.S \
sysdeps/unix/sysv/linux/s390/s390-64/syscall.S
# Omit change that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: last line does not end in newline
git checkout -f sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
2019-09-07 05:40:42 +00:00
|
|
|
<https://www.gnu.org/licenses/>. */
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2014-11-24 09:33:45 +00:00
|
|
|
#if IS_IN (libc) \
|
2010-01-12 19:22:03 +00:00
|
|
|
&& (defined SHARED \
|
2010-01-12 19:28:17 +00:00
|
|
|
|| defined USE_AS_MEMMOVE \
|
2010-01-12 19:22:03 +00:00
|
|
|
|| !defined USE_MULTIARCH)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# include <sysdep.h>
|
|
|
|
# include "asm-syntax.h"
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef MEMCPY
|
|
|
|
# define MEMCPY __memcpy_ssse3
|
|
|
|
# define MEMCPY_CHK __memcpy_chk_ssse3
|
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2022-02-10 14:20:54 +00:00
|
|
|
# define DEST PARMS
|
|
|
|
# define SRC DEST+4
|
|
|
|
# define LEN SRC+4
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# define CFI_PUSH(REG) \
|
|
|
|
cfi_adjust_cfa_offset (4); \
|
2010-01-12 19:22:03 +00:00
|
|
|
cfi_rel_offset (REG, 0)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# define CFI_POP(REG) \
|
|
|
|
cfi_adjust_cfa_offset (-4); \
|
2010-01-12 19:22:03 +00:00
|
|
|
cfi_restore (REG)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
|
|
|
# define POP(REG) popl REG; CFI_POP (REG)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
# define PARMS 8 /* Preserve EBX. */
|
|
|
|
# define ENTRANCE PUSH (%ebx);
|
|
|
|
# define RETURN_END POP (%ebx); ret
|
|
|
|
# define RETURN RETURN_END; CFI_PUSH (%ebx)
|
|
|
|
# define JMPTBL(I, B) I - B
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
2012-03-30 20:45:27 +00:00
|
|
|
jump table with relative offsets. INDEX is a register contains the
|
|
|
|
index into the jump table. SCALE is the scale of INDEX. */
|
|
|
|
|
|
|
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
|
|
|
/* We first load PC into EBX. */ \
|
|
|
|
SETUP_PIC_REG(bx); \
|
|
|
|
/* Get the address of the jump table. */ \
|
|
|
|
addl $(TABLE - .), %ebx; \
|
|
|
|
/* Get the entry and convert the relative offset to the \
|
|
|
|
absolute address. */ \
|
|
|
|
addl (%ebx, INDEX, SCALE), %ebx; \
|
|
|
|
/* We loaded the jump table. Go. */ \
|
2018-07-18 15:00:49 +00:00
|
|
|
_CET_NOTRACK jmp *%ebx
|
2012-03-30 20:45:27 +00:00
|
|
|
# else
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# define PARMS 4
|
|
|
|
# define ENTRANCE
|
|
|
|
# define RETURN_END ret
|
|
|
|
# define RETURN RETURN_END
|
|
|
|
# define JMPTBL(I, B) I
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
/* Branch to an entry in a jump table. TABLE is a jump table with
|
|
|
|
absolute offsets. INDEX is a register contains the index into the
|
|
|
|
jump table. SCALE is the scale of INDEX. */
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
2018-07-18 15:00:49 +00:00
|
|
|
_CET_NOTRACK jmp *TABLE(, INDEX, SCALE)
|
2012-03-30 20:45:27 +00:00
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
.section .text.ssse3,"ax",@progbits
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef SHARED
|
2010-01-12 19:22:03 +00:00
|
|
|
ENTRY (MEMCPY_CHK)
|
|
|
|
movl 12(%esp), %eax
|
|
|
|
cmpl %eax, 16(%esp)
|
|
|
|
jb HIDDEN_JUMPTARGET (__chk_fail)
|
|
|
|
END (MEMCPY_CHK)
|
2012-03-30 20:45:27 +00:00
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
ENTRY (MEMCPY)
|
|
|
|
ENTRANCE
|
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
movl SRC(%esp), %eax
|
|
|
|
movl DEST(%esp), %edx
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifdef USE_AS_MEMMOVE
|
2010-01-12 19:22:03 +00:00
|
|
|
cmp %eax, %edx
|
|
|
|
jb L(copy_forward)
|
|
|
|
je L(fwd_write_0bytes)
|
|
|
|
cmp $32, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jae L(memmove_bwd)
|
2010-01-12 19:22:03 +00:00
|
|
|
jmp L(bk_write_less32bytes_2)
|
2012-03-30 20:45:27 +00:00
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(memmove_bwd):
|
|
|
|
add %ecx, %eax
|
|
|
|
cmp %eax, %edx
|
|
|
|
movl SRC(%esp), %eax
|
|
|
|
jb L(copy_backward)
|
|
|
|
|
|
|
|
L(copy_forward):
|
2012-03-30 20:45:27 +00:00
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
cmp $48, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jae L(48bytesormore)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
L(fwd_write_less32bytes):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
2010-01-12 19:22:03 +00:00
|
|
|
cmp %dl, %al
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(bk_write)
|
2012-03-30 20:45:27 +00:00
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
add %ecx, %edx
|
|
|
|
add %ecx, %eax
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write):
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
|
2012-03-30 20:45:27 +00:00
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(48bytesormore):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movlpd (%eax), %xmm0
|
|
|
|
movlpd 8(%eax), %xmm1
|
|
|
|
movlpd %xmm0, (%edx)
|
|
|
|
movlpd %xmm1, 8(%edx)
|
|
|
|
# else
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqu (%eax), %xmm0
|
2012-03-30 20:45:27 +00:00
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
PUSH (%edi)
|
|
|
|
movl %edx, %edi
|
|
|
|
and $-16, %edx
|
|
|
|
add $16, %edx
|
|
|
|
sub %edx, %edi
|
|
|
|
add %edi, %ecx
|
|
|
|
sub %edi, %eax
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifdef SHARED_CACHE_SIZE_HALF
|
2010-01-12 19:22:03 +00:00
|
|
|
cmp $SHARED_CACHE_SIZE_HALF, %ecx
|
2012-03-30 20:45:27 +00:00
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-02-03 23:22:53 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
2010-01-12 19:22:03 +00:00
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
|
2012-03-30 20:45:27 +00:00
|
|
|
# else
|
2010-01-12 19:22:03 +00:00
|
|
|
cmp __x86_shared_cache_size_half, %ecx
|
2012-03-30 20:45:27 +00:00
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
# endif
|
|
|
|
|
|
|
|
mov %eax, %edi
|
2010-02-25 02:20:57 +00:00
|
|
|
jae L(large_page)
|
2010-01-12 19:22:03 +00:00
|
|
|
and $0xf, %edi
|
|
|
|
jz L(shl_0)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifdef USE_AS_MEMMOVE
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
xor %edi, %edi
|
|
|
|
cmp $127, %ecx
|
|
|
|
ja L(shl_0_gobble)
|
|
|
|
lea -32(%ecx), %ecx
|
2012-03-30 20:45:27 +00:00
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_loop):
|
|
|
|
movdqa (%eax, %edi), %xmm0
|
|
|
|
movdqa 16(%eax, %edi), %xmm1
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa %xmm0, (%edx, %edi)
|
|
|
|
movdqa %xmm1, 16(%edx, %edi)
|
|
|
|
lea 32(%edi), %edi
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(shl_0_end)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa (%eax, %edi), %xmm0
|
|
|
|
movdqa 16(%eax, %edi), %xmm1
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa %xmm0, (%edx, %edi)
|
|
|
|
movdqa %xmm1, 16(%edx, %edi)
|
|
|
|
lea 32(%edi), %edi
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(shl_0_end)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa (%eax, %edi), %xmm0
|
|
|
|
movdqa 16(%eax, %edi), %xmm1
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa %xmm0, (%edx, %edi)
|
|
|
|
movdqa %xmm1, 16(%edx, %edi)
|
|
|
|
lea 32(%edi), %edi
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(shl_0_end)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa (%eax, %edi), %xmm0
|
|
|
|
movdqa 16(%eax, %edi), %xmm1
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa %xmm0, (%edx, %edi)
|
|
|
|
movdqa %xmm1, 16(%edx, %edi)
|
|
|
|
lea 32(%edi), %edi
|
2012-03-30 20:45:27 +00:00
|
|
|
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_end):
|
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
add %edi, %eax
|
|
|
|
POP (%edi)
|
2011-10-23 18:28:26 +00:00
|
|
|
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2010-02-25 02:20:57 +00:00
|
|
|
CFI_PUSH (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(shl_0_gobble):
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
2010-01-12 19:22:03 +00:00
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
2012-03-30 20:45:27 +00:00
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-02-03 23:22:53 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
2010-01-12 19:22:03 +00:00
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
2012-03-30 20:45:27 +00:00
|
|
|
# else
|
2010-01-12 19:22:03 +00:00
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
2012-03-30 20:45:27 +00:00
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
# endif
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -128(%ecx), %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jae L(shl_0_gobble_mem_loop)
|
2012-03-30 20:45:27 +00:00
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_gobble_cache_loop):
|
|
|
|
movdqa (%eax), %xmm0
|
|
|
|
movdqa 0x10(%eax), %xmm1
|
|
|
|
movdqa 0x20(%eax), %xmm2
|
|
|
|
movdqa 0x30(%eax), %xmm3
|
|
|
|
movdqa 0x40(%eax), %xmm4
|
|
|
|
movdqa 0x50(%eax), %xmm5
|
|
|
|
movdqa 0x60(%eax), %xmm6
|
|
|
|
movdqa 0x70(%eax), %xmm7
|
|
|
|
lea 0x80(%eax), %eax
|
|
|
|
sub $128, %ecx
|
|
|
|
movdqa %xmm0, (%edx)
|
|
|
|
movdqa %xmm1, 0x10(%edx)
|
|
|
|
movdqa %xmm2, 0x20(%edx)
|
|
|
|
movdqa %xmm3, 0x30(%edx)
|
|
|
|
movdqa %xmm4, 0x40(%edx)
|
|
|
|
movdqa %xmm5, 0x50(%edx)
|
|
|
|
movdqa %xmm6, 0x60(%edx)
|
|
|
|
movdqa %xmm7, 0x70(%edx)
|
|
|
|
lea 0x80(%edx), %edx
|
|
|
|
|
2010-02-25 02:20:57 +00:00
|
|
|
jae L(shl_0_gobble_cache_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
cmp $-0x40, %ecx
|
|
|
|
lea 0x80(%ecx), %ecx
|
|
|
|
jl L(shl_0_cache_less_64bytes)
|
|
|
|
|
|
|
|
movdqa (%eax), %xmm0
|
|
|
|
sub $0x40, %ecx
|
|
|
|
movdqa 0x10(%eax), %xmm1
|
|
|
|
movdqa %xmm0, (%edx)
|
|
|
|
movdqa %xmm1, 0x10(%edx)
|
|
|
|
movdqa 0x20(%eax), %xmm0
|
|
|
|
movdqa 0x30(%eax), %xmm1
|
|
|
|
add $0x40, %eax
|
|
|
|
movdqa %xmm0, 0x20(%edx)
|
|
|
|
movdqa %xmm1, 0x30(%edx)
|
|
|
|
add $0x40, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_cache_less_64bytes):
|
|
|
|
cmp $0x20, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(shl_0_cache_less_32bytes)
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa (%eax), %xmm0
|
|
|
|
sub $0x20, %ecx
|
|
|
|
movdqa 0x10(%eax), %xmm1
|
|
|
|
add $0x20, %eax
|
|
|
|
movdqa %xmm0, (%edx)
|
|
|
|
movdqa %xmm1, 0x10(%edx)
|
|
|
|
add $0x20, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_cache_less_32bytes):
|
|
|
|
cmp $0x10, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(shl_0_cache_less_16bytes)
|
2010-01-12 19:22:03 +00:00
|
|
|
sub $0x10, %ecx
|
|
|
|
movdqa (%eax), %xmm0
|
|
|
|
add $0x10, %eax
|
|
|
|
movdqa %xmm0, (%edx)
|
|
|
|
add $0x10, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_cache_less_16bytes):
|
|
|
|
add %ecx, %edx
|
|
|
|
add %ecx, %eax
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_gobble_mem_loop):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x280(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
|
|
|
|
movdqa (%eax), %xmm0
|
|
|
|
movdqa 0x10(%eax), %xmm1
|
|
|
|
movdqa 0x20(%eax), %xmm2
|
|
|
|
movdqa 0x30(%eax), %xmm3
|
|
|
|
movdqa 0x40(%eax), %xmm4
|
|
|
|
movdqa 0x50(%eax), %xmm5
|
|
|
|
movdqa 0x60(%eax), %xmm6
|
|
|
|
movdqa 0x70(%eax), %xmm7
|
|
|
|
lea 0x80(%eax), %eax
|
|
|
|
sub $0x80, %ecx
|
|
|
|
movdqa %xmm0, (%edx)
|
|
|
|
movdqa %xmm1, 0x10(%edx)
|
|
|
|
movdqa %xmm2, 0x20(%edx)
|
|
|
|
movdqa %xmm3, 0x30(%edx)
|
|
|
|
movdqa %xmm4, 0x40(%edx)
|
|
|
|
movdqa %xmm5, 0x50(%edx)
|
|
|
|
movdqa %xmm6, 0x60(%edx)
|
|
|
|
movdqa %xmm7, 0x70(%edx)
|
|
|
|
lea 0x80(%edx), %edx
|
|
|
|
|
2010-02-25 02:20:57 +00:00
|
|
|
jae L(shl_0_gobble_mem_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
cmp $-0x40, %ecx
|
|
|
|
lea 0x80(%ecx), %ecx
|
|
|
|
jl L(shl_0_mem_less_64bytes)
|
|
|
|
|
|
|
|
movdqa (%eax), %xmm0
|
|
|
|
sub $0x40, %ecx
|
|
|
|
movdqa 0x10(%eax), %xmm1
|
|
|
|
|
|
|
|
movdqa %xmm0, (%edx)
|
|
|
|
movdqa %xmm1, 0x10(%edx)
|
|
|
|
|
|
|
|
movdqa 0x20(%eax), %xmm0
|
|
|
|
movdqa 0x30(%eax), %xmm1
|
|
|
|
add $0x40, %eax
|
|
|
|
|
|
|
|
movdqa %xmm0, 0x20(%edx)
|
|
|
|
movdqa %xmm1, 0x30(%edx)
|
|
|
|
add $0x40, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_mem_less_64bytes):
|
|
|
|
cmp $0x20, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(shl_0_mem_less_32bytes)
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa (%eax), %xmm0
|
|
|
|
sub $0x20, %ecx
|
|
|
|
movdqa 0x10(%eax), %xmm1
|
|
|
|
add $0x20, %eax
|
|
|
|
movdqa %xmm0, (%edx)
|
|
|
|
movdqa %xmm1, 0x10(%edx)
|
|
|
|
add $0x20, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_mem_less_32bytes):
|
|
|
|
cmp $0x10, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(shl_0_mem_less_16bytes)
|
2010-01-12 19:22:03 +00:00
|
|
|
sub $0x10, %ecx
|
|
|
|
movdqa (%eax), %xmm0
|
|
|
|
add $0x10, %eax
|
|
|
|
movdqa %xmm0, (%edx)
|
|
|
|
add $0x10, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_0_mem_less_16bytes):
|
|
|
|
add %ecx, %edx
|
|
|
|
add %ecx, %eax
|
2011-10-23 18:28:26 +00:00
|
|
|
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_1):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -1(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -1(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_1_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl1LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 15(%eax), %xmm2
|
|
|
|
movaps 31(%eax), %xmm3
|
|
|
|
movaps 47(%eax), %xmm4
|
|
|
|
movaps 63(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $1, %xmm4, %xmm5
|
|
|
|
palignr $1, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $1, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl1LoopStart)
|
|
|
|
|
|
|
|
L(Shl1LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 15(%eax), %xmm2
|
|
|
|
movaps 31(%eax), %xmm3
|
|
|
|
palignr $1, %xmm2, %xmm3
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_1_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -1(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_1_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $1, %xmm2, %xmm3
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_1_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $1, %xmm2, %xmm3
|
|
|
|
palignr $1, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_1_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_1_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 1(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_2):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -2(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -2(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_2_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl2LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 14(%eax), %xmm2
|
|
|
|
movaps 30(%eax), %xmm3
|
|
|
|
movaps 46(%eax), %xmm4
|
|
|
|
movaps 62(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $2, %xmm4, %xmm5
|
|
|
|
palignr $2, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $2, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl2LoopStart)
|
|
|
|
|
|
|
|
L(Shl2LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 14(%eax), %xmm2
|
|
|
|
movaps 30(%eax), %xmm3
|
|
|
|
palignr $2, %xmm2, %xmm3
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_2_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -2(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_2_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $2, %xmm2, %xmm3
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_2_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $2, %xmm2, %xmm3
|
|
|
|
palignr $2, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_2_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_2_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 2(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_3):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -3(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -3(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_3_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl3LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 13(%eax), %xmm2
|
|
|
|
movaps 29(%eax), %xmm3
|
|
|
|
movaps 45(%eax), %xmm4
|
|
|
|
movaps 61(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $3, %xmm4, %xmm5
|
|
|
|
palignr $3, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $3, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl3LoopStart)
|
|
|
|
|
|
|
|
L(Shl3LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 13(%eax), %xmm2
|
|
|
|
movaps 29(%eax), %xmm3
|
|
|
|
palignr $3, %xmm2, %xmm3
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_3_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -3(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_3_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $3, %xmm2, %xmm3
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_3_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $3, %xmm2, %xmm3
|
|
|
|
palignr $3, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_3_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_3_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 3(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_4):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -4(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -4(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_4_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl4LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 12(%eax), %xmm2
|
|
|
|
movaps 28(%eax), %xmm3
|
|
|
|
movaps 44(%eax), %xmm4
|
|
|
|
movaps 60(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $4, %xmm4, %xmm5
|
|
|
|
palignr $4, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $4, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl4LoopStart)
|
|
|
|
|
|
|
|
L(Shl4LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 12(%eax), %xmm2
|
|
|
|
movaps 28(%eax), %xmm3
|
|
|
|
palignr $4, %xmm2, %xmm3
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_4_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -4(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_4_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $4, %xmm2, %xmm3
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_4_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $4, %xmm2, %xmm3
|
|
|
|
palignr $4, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_4_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_4_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 4(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_5):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -5(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -5(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_5_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl5LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 11(%eax), %xmm2
|
|
|
|
movaps 27(%eax), %xmm3
|
|
|
|
movaps 43(%eax), %xmm4
|
|
|
|
movaps 59(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $5, %xmm4, %xmm5
|
|
|
|
palignr $5, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $5, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl5LoopStart)
|
|
|
|
|
|
|
|
L(Shl5LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 11(%eax), %xmm2
|
|
|
|
movaps 27(%eax), %xmm3
|
|
|
|
palignr $5, %xmm2, %xmm3
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_5_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -5(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_5_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $5, %xmm2, %xmm3
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_5_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $5, %xmm2, %xmm3
|
|
|
|
palignr $5, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_5_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_5_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 5(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_6):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -6(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -6(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_6_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl6LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 10(%eax), %xmm2
|
|
|
|
movaps 26(%eax), %xmm3
|
|
|
|
movaps 42(%eax), %xmm4
|
|
|
|
movaps 58(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $6, %xmm4, %xmm5
|
|
|
|
palignr $6, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $6, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl6LoopStart)
|
|
|
|
|
|
|
|
L(Shl6LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 10(%eax), %xmm2
|
|
|
|
movaps 26(%eax), %xmm3
|
|
|
|
palignr $6, %xmm2, %xmm3
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_6_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -6(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_6_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $6, %xmm2, %xmm3
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_6_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $6, %xmm2, %xmm3
|
|
|
|
palignr $6, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_6_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_6_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 6(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_7):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -7(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -7(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_7_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl7LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 9(%eax), %xmm2
|
|
|
|
movaps 25(%eax), %xmm3
|
|
|
|
movaps 41(%eax), %xmm4
|
|
|
|
movaps 57(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $7, %xmm4, %xmm5
|
|
|
|
palignr $7, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $7, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl7LoopStart)
|
|
|
|
|
|
|
|
L(Shl7LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 9(%eax), %xmm2
|
|
|
|
movaps 25(%eax), %xmm3
|
|
|
|
palignr $7, %xmm2, %xmm3
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_7_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -7(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_7_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $7, %xmm2, %xmm3
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_7_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $7, %xmm2, %xmm3
|
|
|
|
palignr $7, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_7_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_7_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 7(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_8):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -8(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -8(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_8_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl8LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 8(%eax), %xmm2
|
|
|
|
movaps 24(%eax), %xmm3
|
|
|
|
movaps 40(%eax), %xmm4
|
|
|
|
movaps 56(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $8, %xmm4, %xmm5
|
|
|
|
palignr $8, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $8, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl8LoopStart)
|
|
|
|
|
|
|
|
L(LoopLeave8):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 8(%eax), %xmm2
|
|
|
|
movaps 24(%eax), %xmm3
|
|
|
|
palignr $8, %xmm2, %xmm3
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_8_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -8(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_8_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $8, %xmm2, %xmm3
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_8_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $8, %xmm2, %xmm3
|
|
|
|
palignr $8, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_8_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_8_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 8(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_9):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -9(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -9(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_9_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl9LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 7(%eax), %xmm2
|
|
|
|
movaps 23(%eax), %xmm3
|
|
|
|
movaps 39(%eax), %xmm4
|
|
|
|
movaps 55(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $9, %xmm4, %xmm5
|
|
|
|
palignr $9, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $9, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl9LoopStart)
|
|
|
|
|
|
|
|
L(Shl9LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 7(%eax), %xmm2
|
|
|
|
movaps 23(%eax), %xmm3
|
|
|
|
palignr $9, %xmm2, %xmm3
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
|
|
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_9_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -9(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_9_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $9, %xmm2, %xmm3
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_9_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $9, %xmm2, %xmm3
|
|
|
|
palignr $9, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_9_no_prefetch_loop)
|
|
|
|
|
|
|
|
L(sh_9_end_no_prefetch_loop):
|
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 9(%edi, %eax), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(shl_10):
|
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -10(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -10(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_10_no_prefetch)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
lea -64(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(Shl10LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 6(%eax), %xmm2
|
|
|
|
movaps 22(%eax), %xmm3
|
|
|
|
movaps 38(%eax), %xmm4
|
|
|
|
movaps 54(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $10, %xmm4, %xmm5
|
|
|
|
palignr $10, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $10, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl10LoopStart)
|
|
|
|
|
|
|
|
L(Shl10LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 6(%eax), %xmm2
|
|
|
|
movaps 22(%eax), %xmm3
|
|
|
|
palignr $10, %xmm2, %xmm3
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
|
|
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
2010-01-12 19:22:03 +00:00
|
|
|
POP (%edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_10_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -10(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_10_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $10, %xmm2, %xmm3
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_10_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $10, %xmm2, %xmm3
|
|
|
|
palignr $10, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_10_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_10_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 10(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_11):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -11(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -11(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_11_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl11LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 5(%eax), %xmm2
|
|
|
|
movaps 21(%eax), %xmm3
|
|
|
|
movaps 37(%eax), %xmm4
|
|
|
|
movaps 53(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $11, %xmm4, %xmm5
|
|
|
|
palignr $11, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $11, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl11LoopStart)
|
|
|
|
|
|
|
|
L(Shl11LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 5(%eax), %xmm2
|
|
|
|
movaps 21(%eax), %xmm3
|
|
|
|
palignr $11, %xmm2, %xmm3
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
|
|
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_11_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -11(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_11_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $11, %xmm2, %xmm3
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_11_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $11, %xmm2, %xmm3
|
|
|
|
palignr $11, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_11_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_11_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 11(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_12):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -12(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -12(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_12_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl12LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 4(%eax), %xmm2
|
|
|
|
movaps 20(%eax), %xmm3
|
|
|
|
movaps 36(%eax), %xmm4
|
|
|
|
movaps 52(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $12, %xmm4, %xmm5
|
|
|
|
palignr $12, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $12, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl12LoopStart)
|
|
|
|
|
|
|
|
L(Shl12LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 4(%eax), %xmm2
|
|
|
|
movaps 20(%eax), %xmm3
|
|
|
|
palignr $12, %xmm2, %xmm3
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
|
|
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_12_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -12(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_12_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $12, %xmm2, %xmm3
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_12_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $12, %xmm2, %xmm3
|
|
|
|
palignr $12, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_12_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_12_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 12(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_13):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -13(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -13(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_13_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl13LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 3(%eax), %xmm2
|
|
|
|
movaps 19(%eax), %xmm3
|
|
|
|
movaps 35(%eax), %xmm4
|
|
|
|
movaps 51(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $13, %xmm4, %xmm5
|
|
|
|
palignr $13, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $13, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl13LoopStart)
|
|
|
|
|
|
|
|
L(Shl13LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 3(%eax), %xmm2
|
|
|
|
movaps 19(%eax), %xmm3
|
|
|
|
palignr $13, %xmm2, %xmm3
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
|
|
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_13_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -13(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_13_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $13, %xmm2, %xmm3
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_13_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $13, %xmm2, %xmm3
|
|
|
|
palignr $13, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_13_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_13_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 13(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_14):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -14(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -14(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_14_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl14LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 2(%eax), %xmm2
|
|
|
|
movaps 18(%eax), %xmm3
|
|
|
|
movaps 34(%eax), %xmm4
|
|
|
|
movaps 50(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $14, %xmm4, %xmm5
|
|
|
|
palignr $14, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $14, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl14LoopStart)
|
|
|
|
|
|
|
|
L(Shl14LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 2(%eax), %xmm2
|
|
|
|
movaps 18(%eax), %xmm3
|
|
|
|
palignr $14, %xmm2, %xmm3
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
|
|
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_14_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -14(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_14_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $14, %xmm2, %xmm3
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_14_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $14, %xmm2, %xmm3
|
|
|
|
palignr $14, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_14_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_14_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 14(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_15):
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifndef USE_AS_MEMMOVE
|
|
|
|
movaps -15(%eax), %xmm1
|
|
|
|
# else
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movaps -15(%eax), %xmm1
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
|
|
|
# ifdef DATA_CACHE_SIZE_HALF
|
|
|
|
cmp $DATA_CACHE_SIZE_HALF, %ecx
|
|
|
|
# else
|
2017-08-02 17:25:11 +00:00
|
|
|
# ifdef PIC
|
2012-03-30 20:45:27 +00:00
|
|
|
SETUP_PIC_REG(bx)
|
|
|
|
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
|
|
cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
|
|
|
|
# else
|
|
|
|
cmp __x86_data_cache_size_half, %ecx
|
|
|
|
# endif
|
|
|
|
# endif
|
|
|
|
jb L(sh_15_no_prefetch)
|
|
|
|
|
|
|
|
lea -64(%ecx), %ecx
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(Shl15LoopStart):
|
|
|
|
prefetcht0 0x1c0(%eax)
|
|
|
|
prefetcht0 0x1c0(%edx)
|
|
|
|
movaps 1(%eax), %xmm2
|
|
|
|
movaps 17(%eax), %xmm3
|
|
|
|
movaps 33(%eax), %xmm4
|
|
|
|
movaps 49(%eax), %xmm5
|
|
|
|
movaps %xmm5, %xmm7
|
|
|
|
palignr $15, %xmm4, %xmm5
|
|
|
|
palignr $15, %xmm3, %xmm4
|
|
|
|
movaps %xmm5, 48(%edx)
|
|
|
|
palignr $15, %xmm2, %xmm3
|
|
|
|
lea 64(%eax), %eax
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
|
|
movaps %xmm4, 32(%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
movaps %xmm7, %xmm1
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
lea 64(%edx), %edx
|
|
|
|
sub $64, %ecx
|
|
|
|
ja L(Shl15LoopStart)
|
|
|
|
|
|
|
|
L(Shl15LoopLeave):
|
|
|
|
add $32, %ecx
|
|
|
|
jle L(shl_end_0)
|
|
|
|
|
|
|
|
movaps 1(%eax), %xmm2
|
|
|
|
movaps 17(%eax), %xmm3
|
|
|
|
palignr $15, %xmm2, %xmm3
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
|
|
|
|
|
|
movaps %xmm2, (%edx)
|
|
|
|
movaps %xmm3, 16(%edx)
|
|
|
|
lea 32(%edx, %ecx), %edx
|
|
|
|
lea 32(%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(sh_15_no_prefetch):
|
|
|
|
lea -32(%ecx), %ecx
|
2010-01-12 19:22:03 +00:00
|
|
|
lea -15(%eax), %eax
|
|
|
|
xor %edi, %edi
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(sh_15_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm4
|
|
|
|
palignr $15, %xmm2, %xmm3
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jb L(sh_15_end_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
movdqa 16(%eax, %edi), %xmm2
|
|
|
|
sub $32, %ecx
|
|
|
|
movdqa 32(%eax, %edi), %xmm3
|
|
|
|
movdqa %xmm3, %xmm1
|
|
|
|
palignr $15, %xmm2, %xmm3
|
|
|
|
palignr $15, %xmm4, %xmm2
|
|
|
|
lea 32(%edi), %edi
|
|
|
|
movdqa %xmm2, -32(%edx, %edi)
|
|
|
|
movdqa %xmm3, -16(%edx, %edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
jae L(sh_15_no_prefetch_loop)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
L(sh_15_end_no_prefetch_loop):
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
add %ecx, %edi
|
|
|
|
add %edi, %edx
|
|
|
|
lea 15(%edi, %eax), %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
|
|
|
CFI_PUSH (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
|
|
|
L(shl_end_0):
|
|
|
|
lea 32(%ecx), %ecx
|
|
|
|
lea (%edx, %ecx), %edx
|
|
|
|
lea (%eax, %ecx), %eax
|
|
|
|
POP (%edi)
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_44bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -44(%eax), %xmm0
|
|
|
|
movq %xmm0, -44(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_36bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -36(%eax), %xmm0
|
|
|
|
movq %xmm0, -36(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_28bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -28(%eax), %xmm0
|
|
|
|
movq %xmm0, -28(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_20bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -20(%eax), %xmm0
|
|
|
|
movq %xmm0, -20(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_12bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -12(%eax), %xmm0
|
|
|
|
movq %xmm0, -12(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_4bytes):
|
|
|
|
movl -4(%eax), %ecx
|
|
|
|
movl %ecx, -4(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_40bytes):
|
|
|
|
movq -40(%eax), %xmm0
|
|
|
|
movq %xmm0, -40(%edx)
|
|
|
|
L(fwd_write_32bytes):
|
|
|
|
movq -32(%eax), %xmm0
|
|
|
|
movq %xmm0, -32(%edx)
|
|
|
|
L(fwd_write_24bytes):
|
|
|
|
movq -24(%eax), %xmm0
|
|
|
|
movq %xmm0, -24(%edx)
|
|
|
|
L(fwd_write_16bytes):
|
|
|
|
movq -16(%eax), %xmm0
|
|
|
|
movq %xmm0, -16(%edx)
|
|
|
|
L(fwd_write_8bytes):
|
|
|
|
movq -8(%eax), %xmm0
|
|
|
|
movq %xmm0, -8(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_0bytes):
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2010-01-12 19:22:03 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2010-01-12 19:22:03 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_5bytes):
|
|
|
|
movl -5(%eax), %ecx
|
|
|
|
movl -4(%eax), %eax
|
|
|
|
movl %ecx, -5(%edx)
|
|
|
|
movl %eax, -4(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2010-01-12 19:22:03 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2010-01-12 19:22:03 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_45bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -45(%eax), %xmm0
|
|
|
|
movq %xmm0, -45(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_37bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -37(%eax), %xmm0
|
|
|
|
movq %xmm0, -37(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_29bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -29(%eax), %xmm0
|
|
|
|
movq %xmm0, -29(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_21bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -21(%eax), %xmm0
|
|
|
|
movq %xmm0, -21(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_13bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -13(%eax), %xmm0
|
|
|
|
movq %xmm0, -13(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
movl -5(%eax), %ecx
|
|
|
|
movl %ecx, -5(%edx)
|
2011-10-23 18:28:26 +00:00
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
movb %cl, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_41bytes):
|
|
|
|
movq -41(%eax), %xmm0
|
|
|
|
movq %xmm0, -41(%edx)
|
|
|
|
L(fwd_write_33bytes):
|
|
|
|
movq -33(%eax), %xmm0
|
|
|
|
movq %xmm0, -33(%edx)
|
|
|
|
L(fwd_write_25bytes):
|
|
|
|
movq -25(%eax), %xmm0
|
|
|
|
movq %xmm0, -25(%edx)
|
|
|
|
L(fwd_write_17bytes):
|
|
|
|
movq -17(%eax), %xmm0
|
|
|
|
movq %xmm0, -17(%edx)
|
|
|
|
L(fwd_write_9bytes):
|
|
|
|
movq -9(%eax), %xmm0
|
|
|
|
movq %xmm0, -9(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_1bytes):
|
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
movb %cl, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2010-01-12 19:22:03 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2010-01-12 19:22:03 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_46bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -46(%eax), %xmm0
|
|
|
|
movq %xmm0, -46(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_38bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -38(%eax), %xmm0
|
|
|
|
movq %xmm0, -38(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_30bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -30(%eax), %xmm0
|
|
|
|
movq %xmm0, -30(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_22bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -22(%eax), %xmm0
|
|
|
|
movq %xmm0, -22(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_14bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -14(%eax), %xmm0
|
|
|
|
movq %xmm0, -14(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_6bytes):
|
|
|
|
movl -6(%eax), %ecx
|
|
|
|
movl %ecx, -6(%edx)
|
2011-10-23 18:28:26 +00:00
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movw %cx, -2(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_42bytes):
|
|
|
|
movq -42(%eax), %xmm0
|
|
|
|
movq %xmm0, -42(%edx)
|
|
|
|
L(fwd_write_34bytes):
|
|
|
|
movq -34(%eax), %xmm0
|
|
|
|
movq %xmm0, -34(%edx)
|
|
|
|
L(fwd_write_26bytes):
|
|
|
|
movq -26(%eax), %xmm0
|
|
|
|
movq %xmm0, -26(%edx)
|
|
|
|
L(fwd_write_18bytes):
|
|
|
|
movq -18(%eax), %xmm0
|
|
|
|
movq %xmm0, -18(%edx)
|
|
|
|
L(fwd_write_10bytes):
|
|
|
|
movq -10(%eax), %xmm0
|
|
|
|
movq %xmm0, -10(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_2bytes):
|
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movw %cx, -2(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2010-01-12 19:22:03 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2010-01-12 19:22:03 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_47bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -47(%eax), %xmm0
|
|
|
|
movq %xmm0, -47(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_39bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -39(%eax), %xmm0
|
|
|
|
movq %xmm0, -39(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_31bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -31(%eax), %xmm0
|
|
|
|
movq %xmm0, -31(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_23bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -23(%eax), %xmm0
|
|
|
|
movq %xmm0, -23(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_15bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq -15(%eax), %xmm0
|
|
|
|
movq %xmm0, -15(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_7bytes):
|
|
|
|
movl -7(%eax), %ecx
|
|
|
|
movl %ecx, -7(%edx)
|
2011-10-23 18:28:26 +00:00
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
movw %cx, -3(%edx)
|
|
|
|
movb %al, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_43bytes):
|
|
|
|
movq -43(%eax), %xmm0
|
|
|
|
movq %xmm0, -43(%edx)
|
|
|
|
L(fwd_write_35bytes):
|
|
|
|
movq -35(%eax), %xmm0
|
|
|
|
movq %xmm0, -35(%edx)
|
|
|
|
L(fwd_write_27bytes):
|
|
|
|
movq -27(%eax), %xmm0
|
|
|
|
movq %xmm0, -27(%edx)
|
|
|
|
L(fwd_write_19bytes):
|
|
|
|
movq -19(%eax), %xmm0
|
|
|
|
movq %xmm0, -19(%edx)
|
|
|
|
L(fwd_write_11bytes):
|
|
|
|
movq -11(%eax), %xmm0
|
|
|
|
movq %xmm0, -11(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(fwd_write_3bytes):
|
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
movw %cx, -3(%edx)
|
|
|
|
movb %al, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2010-01-12 19:22:03 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2010-01-12 19:22:03 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
2011-10-23 18:28:26 +00:00
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_40bytes_align):
|
|
|
|
movdqa -40(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -40(%edx)
|
|
|
|
L(fwd_write_24bytes_align):
|
|
|
|
movdqa -24(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -24(%edx)
|
|
|
|
L(fwd_write_8bytes_align):
|
|
|
|
movq -8(%eax), %xmm0
|
|
|
|
movq %xmm0, -8(%edx)
|
|
|
|
L(fwd_write_0bytes_align):
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_32bytes_align):
|
|
|
|
movdqa -32(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -32(%edx)
|
|
|
|
L(fwd_write_16bytes_align):
|
|
|
|
movdqa -16(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -16(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_5bytes_align):
|
|
|
|
movl -5(%eax), %ecx
|
|
|
|
movl -4(%eax), %eax
|
|
|
|
movl %ecx, -5(%edx)
|
|
|
|
movl %eax, -4(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_45bytes_align):
|
|
|
|
movdqa -45(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -45(%edx)
|
|
|
|
L(fwd_write_29bytes_align):
|
|
|
|
movdqa -29(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -29(%edx)
|
|
|
|
L(fwd_write_13bytes_align):
|
|
|
|
movq -13(%eax), %xmm0
|
|
|
|
movq %xmm0, -13(%edx)
|
|
|
|
movl -5(%eax), %ecx
|
|
|
|
movl %ecx, -5(%edx)
|
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
movb %cl, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_37bytes_align):
|
|
|
|
movdqa -37(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -37(%edx)
|
|
|
|
L(fwd_write_21bytes_align):
|
|
|
|
movdqa -21(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -21(%edx)
|
|
|
|
movl -5(%eax), %ecx
|
|
|
|
movl %ecx, -5(%edx)
|
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
movb %cl, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_41bytes_align):
|
|
|
|
movdqa -41(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -41(%edx)
|
|
|
|
L(fwd_write_25bytes_align):
|
|
|
|
movdqa -25(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -25(%edx)
|
|
|
|
L(fwd_write_9bytes_align):
|
|
|
|
movq -9(%eax), %xmm0
|
|
|
|
movq %xmm0, -9(%edx)
|
|
|
|
L(fwd_write_1bytes_align):
|
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
movb %cl, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_33bytes_align):
|
|
|
|
movdqa -33(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -33(%edx)
|
|
|
|
L(fwd_write_17bytes_align):
|
|
|
|
movdqa -17(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -17(%edx)
|
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
movb %cl, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_46bytes_align):
|
|
|
|
movdqa -46(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -46(%edx)
|
|
|
|
L(fwd_write_30bytes_align):
|
|
|
|
movdqa -30(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -30(%edx)
|
|
|
|
L(fwd_write_14bytes_align):
|
|
|
|
movq -14(%eax), %xmm0
|
|
|
|
movq %xmm0, -14(%edx)
|
|
|
|
L(fwd_write_6bytes_align):
|
|
|
|
movl -6(%eax), %ecx
|
|
|
|
movl %ecx, -6(%edx)
|
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movw %cx, -2(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_38bytes_align):
|
|
|
|
movdqa -38(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -38(%edx)
|
|
|
|
L(fwd_write_22bytes_align):
|
|
|
|
movdqa -22(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -22(%edx)
|
|
|
|
movl -6(%eax), %ecx
|
|
|
|
movl %ecx, -6(%edx)
|
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movw %cx, -2(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_42bytes_align):
|
|
|
|
movdqa -42(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -42(%edx)
|
|
|
|
L(fwd_write_26bytes_align):
|
|
|
|
movdqa -26(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -26(%edx)
|
|
|
|
L(fwd_write_10bytes_align):
|
|
|
|
movq -10(%eax), %xmm0
|
|
|
|
movq %xmm0, -10(%edx)
|
|
|
|
L(fwd_write_2bytes_align):
|
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movw %cx, -2(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_34bytes_align):
|
|
|
|
movdqa -34(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -34(%edx)
|
|
|
|
L(fwd_write_18bytes_align):
|
|
|
|
movdqa -18(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -18(%edx)
|
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movw %cx, -2(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_47bytes_align):
|
|
|
|
movdqa -47(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -47(%edx)
|
|
|
|
L(fwd_write_31bytes_align):
|
|
|
|
movdqa -31(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -31(%edx)
|
|
|
|
L(fwd_write_15bytes_align):
|
|
|
|
movq -15(%eax), %xmm0
|
|
|
|
movq %xmm0, -15(%edx)
|
|
|
|
L(fwd_write_7bytes_align):
|
|
|
|
movl -7(%eax), %ecx
|
|
|
|
movl %ecx, -7(%edx)
|
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
movw %cx, -3(%edx)
|
|
|
|
movb %al, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_39bytes_align):
|
|
|
|
movdqa -39(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -39(%edx)
|
|
|
|
L(fwd_write_23bytes_align):
|
|
|
|
movdqa -23(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -23(%edx)
|
|
|
|
movl -7(%eax), %ecx
|
|
|
|
movl %ecx, -7(%edx)
|
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
movw %cx, -3(%edx)
|
|
|
|
movb %al, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_43bytes_align):
|
|
|
|
movdqa -43(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -43(%edx)
|
|
|
|
L(fwd_write_27bytes_align):
|
|
|
|
movdqa -27(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -27(%edx)
|
|
|
|
L(fwd_write_11bytes_align):
|
|
|
|
movq -11(%eax), %xmm0
|
|
|
|
movq %xmm0, -11(%edx)
|
|
|
|
L(fwd_write_3bytes_align):
|
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
movw %cx, -3(%edx)
|
|
|
|
movb %al, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_35bytes_align):
|
|
|
|
movdqa -35(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -35(%edx)
|
|
|
|
L(fwd_write_19bytes_align):
|
|
|
|
movdqa -19(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -19(%edx)
|
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
movw %cx, -3(%edx)
|
|
|
|
movb %al, -1(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_44bytes_align):
|
|
|
|
movdqa -44(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -44(%edx)
|
|
|
|
L(fwd_write_28bytes_align):
|
|
|
|
movdqa -28(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -28(%edx)
|
|
|
|
L(fwd_write_12bytes_align):
|
|
|
|
movq -12(%eax), %xmm0
|
|
|
|
movq %xmm0, -12(%edx)
|
|
|
|
L(fwd_write_4bytes_align):
|
|
|
|
movl -4(%eax), %ecx
|
|
|
|
movl %ecx, -4(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(fwd_write_36bytes_align):
|
|
|
|
movdqa -36(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -36(%edx)
|
|
|
|
L(fwd_write_20bytes_align):
|
|
|
|
movdqa -20(%eax), %xmm0
|
|
|
|
movdqa %xmm0, -20(%edx)
|
|
|
|
movl -4(%eax), %ecx
|
|
|
|
movl %ecx, -4(%edx)
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl %edx, %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# else
|
2011-10-23 18:28:26 +00:00
|
|
|
movl DEST(%esp), %eax
|
|
|
|
# endif
|
2010-02-25 02:20:57 +00:00
|
|
|
RETURN_END
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(large_page):
|
|
|
|
movdqu (%eax), %xmm1
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifdef USE_AS_MEMMOVE
|
|
|
|
movl DEST+4(%esp), %edi
|
|
|
|
movdqu %xmm0, (%edi)
|
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
lea 16(%eax), %eax
|
|
|
|
movntdq %xmm1, (%edx)
|
|
|
|
lea 16(%edx), %edx
|
|
|
|
lea -0x90(%ecx), %ecx
|
|
|
|
POP (%edi)
|
2012-03-30 20:45:27 +00:00
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(large_page_loop):
|
|
|
|
movdqu (%eax), %xmm0
|
|
|
|
movdqu 0x10(%eax), %xmm1
|
|
|
|
movdqu 0x20(%eax), %xmm2
|
|
|
|
movdqu 0x30(%eax), %xmm3
|
|
|
|
movdqu 0x40(%eax), %xmm4
|
|
|
|
movdqu 0x50(%eax), %xmm5
|
|
|
|
movdqu 0x60(%eax), %xmm6
|
|
|
|
movdqu 0x70(%eax), %xmm7
|
|
|
|
lea 0x80(%eax), %eax
|
|
|
|
|
|
|
|
sub $0x80, %ecx
|
|
|
|
movntdq %xmm0, (%edx)
|
|
|
|
movntdq %xmm1, 0x10(%edx)
|
|
|
|
movntdq %xmm2, 0x20(%edx)
|
|
|
|
movntdq %xmm3, 0x30(%edx)
|
|
|
|
movntdq %xmm4, 0x40(%edx)
|
|
|
|
movntdq %xmm5, 0x50(%edx)
|
|
|
|
movntdq %xmm6, 0x60(%edx)
|
|
|
|
movntdq %xmm7, 0x70(%edx)
|
|
|
|
lea 0x80(%edx), %edx
|
|
|
|
jae L(large_page_loop)
|
|
|
|
cmp $-0x40, %ecx
|
|
|
|
lea 0x80(%ecx), %ecx
|
|
|
|
jl L(large_page_less_64bytes)
|
|
|
|
|
|
|
|
movdqu (%eax), %xmm0
|
|
|
|
movdqu 0x10(%eax), %xmm1
|
|
|
|
movdqu 0x20(%eax), %xmm2
|
|
|
|
movdqu 0x30(%eax), %xmm3
|
|
|
|
lea 0x40(%eax), %eax
|
|
|
|
|
|
|
|
movntdq %xmm0, (%edx)
|
|
|
|
movntdq %xmm1, 0x10(%edx)
|
|
|
|
movntdq %xmm2, 0x20(%edx)
|
|
|
|
movntdq %xmm3, 0x30(%edx)
|
|
|
|
lea 0x40(%edx), %edx
|
|
|
|
sub $0x40, %ecx
|
|
|
|
L(large_page_less_64bytes):
|
|
|
|
cmp $32, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(large_page_less_32bytes)
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqu (%eax), %xmm0
|
|
|
|
movdqu 0x10(%eax), %xmm1
|
|
|
|
lea 0x20(%eax), %eax
|
|
|
|
movntdq %xmm0, (%edx)
|
|
|
|
movntdq %xmm1, 0x10(%edx)
|
|
|
|
lea 0x20(%edx), %edx
|
|
|
|
sub $0x20, %ecx
|
|
|
|
L(large_page_less_32bytes):
|
|
|
|
add %ecx, %edx
|
|
|
|
add %ecx, %eax
|
|
|
|
sfence
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_44bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 36(%eax), %xmm0
|
|
|
|
movq %xmm0, 36(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_36bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 28(%eax), %xmm0
|
|
|
|
movq %xmm0, 28(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_28bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 20(%eax), %xmm0
|
|
|
|
movq %xmm0, 20(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_20bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 12(%eax), %xmm0
|
|
|
|
movq %xmm0, 12(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_12bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 4(%eax), %xmm0
|
|
|
|
movq %xmm0, 4(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_4bytes):
|
|
|
|
movl (%eax), %ecx
|
|
|
|
movl %ecx, (%edx)
|
|
|
|
L(bk_write_0bytes):
|
|
|
|
movl DEST(%esp), %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2010-01-12 19:22:03 +00:00
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
add %ecx, %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(bk_write_40bytes):
|
|
|
|
movq 32(%eax), %xmm0
|
|
|
|
movq %xmm0, 32(%edx)
|
|
|
|
L(bk_write_32bytes):
|
|
|
|
movq 24(%eax), %xmm0
|
|
|
|
movq %xmm0, 24(%edx)
|
|
|
|
L(bk_write_24bytes):
|
|
|
|
movq 16(%eax), %xmm0
|
|
|
|
movq %xmm0, 16(%edx)
|
|
|
|
L(bk_write_16bytes):
|
|
|
|
movq 8(%eax), %xmm0
|
|
|
|
movq %xmm0, 8(%edx)
|
|
|
|
L(bk_write_8bytes):
|
|
|
|
movq (%eax), %xmm0
|
|
|
|
movq %xmm0, (%edx)
|
|
|
|
movl DEST(%esp), %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
add %ecx, %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_45bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 37(%eax), %xmm0
|
|
|
|
movq %xmm0, 37(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_37bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 29(%eax), %xmm0
|
|
|
|
movq %xmm0, 29(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_29bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 21(%eax), %xmm0
|
|
|
|
movq %xmm0, 21(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_21bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 13(%eax), %xmm0
|
|
|
|
movq %xmm0, 13(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_13bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 5(%eax), %xmm0
|
|
|
|
movq %xmm0, 5(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_5bytes):
|
|
|
|
movl 1(%eax), %ecx
|
|
|
|
movl %ecx, 1(%edx)
|
|
|
|
L(bk_write_1bytes):
|
|
|
|
movzbl (%eax), %ecx
|
|
|
|
movb %cl, (%edx)
|
|
|
|
movl DEST(%esp), %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2010-01-12 19:22:03 +00:00
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
add %ecx, %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(bk_write_41bytes):
|
|
|
|
movq 33(%eax), %xmm0
|
|
|
|
movq %xmm0, 33(%edx)
|
|
|
|
L(bk_write_33bytes):
|
|
|
|
movq 25(%eax), %xmm0
|
|
|
|
movq %xmm0, 25(%edx)
|
|
|
|
L(bk_write_25bytes):
|
|
|
|
movq 17(%eax), %xmm0
|
|
|
|
movq %xmm0, 17(%edx)
|
|
|
|
L(bk_write_17bytes):
|
|
|
|
movq 9(%eax), %xmm0
|
|
|
|
movq %xmm0, 9(%edx)
|
|
|
|
L(bk_write_9bytes):
|
|
|
|
movq 1(%eax), %xmm0
|
|
|
|
movq %xmm0, 1(%edx)
|
|
|
|
movzbl (%eax), %ecx
|
|
|
|
movb %cl, (%edx)
|
|
|
|
movl DEST(%esp), %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
add %ecx, %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_46bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 38(%eax), %xmm0
|
|
|
|
movq %xmm0, 38(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_38bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 30(%eax), %xmm0
|
|
|
|
movq %xmm0, 30(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_30bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 22(%eax), %xmm0
|
|
|
|
movq %xmm0, 22(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_22bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 14(%eax), %xmm0
|
|
|
|
movq %xmm0, 14(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_14bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 6(%eax), %xmm0
|
|
|
|
movq %xmm0, 6(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_6bytes):
|
|
|
|
movl 2(%eax), %ecx
|
|
|
|
movl %ecx, 2(%edx)
|
2011-10-23 18:28:26 +00:00
|
|
|
movzwl (%eax), %ecx
|
|
|
|
movw %cx, (%edx)
|
|
|
|
movl DEST(%esp), %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
add %ecx, %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(bk_write_42bytes):
|
|
|
|
movq 34(%eax), %xmm0
|
|
|
|
movq %xmm0, 34(%edx)
|
|
|
|
L(bk_write_34bytes):
|
|
|
|
movq 26(%eax), %xmm0
|
|
|
|
movq %xmm0, 26(%edx)
|
|
|
|
L(bk_write_26bytes):
|
|
|
|
movq 18(%eax), %xmm0
|
|
|
|
movq %xmm0, 18(%edx)
|
|
|
|
L(bk_write_18bytes):
|
|
|
|
movq 10(%eax), %xmm0
|
|
|
|
movq %xmm0, 10(%edx)
|
|
|
|
L(bk_write_10bytes):
|
|
|
|
movq 2(%eax), %xmm0
|
|
|
|
movq %xmm0, 2(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_2bytes):
|
|
|
|
movzwl (%eax), %ecx
|
|
|
|
movw %cx, (%edx)
|
|
|
|
movl DEST(%esp), %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2010-01-12 19:22:03 +00:00
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
add %ecx, %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_47bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 39(%eax), %xmm0
|
|
|
|
movq %xmm0, 39(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_39bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 31(%eax), %xmm0
|
|
|
|
movq %xmm0, 31(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_31bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 23(%eax), %xmm0
|
|
|
|
movq %xmm0, 23(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_23bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 15(%eax), %xmm0
|
|
|
|
movq %xmm0, 15(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_15bytes):
|
2011-10-23 18:28:26 +00:00
|
|
|
movq 7(%eax), %xmm0
|
|
|
|
movq %xmm0, 7(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_7bytes):
|
|
|
|
movl 3(%eax), %ecx
|
|
|
|
movl %ecx, 3(%edx)
|
2011-10-23 18:28:26 +00:00
|
|
|
movzwl 1(%eax), %ecx
|
|
|
|
movw %cx, 1(%edx)
|
|
|
|
movzbl (%eax), %eax
|
|
|
|
movb %al, (%edx)
|
|
|
|
movl DEST(%esp), %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2011-10-23 18:28:26 +00:00
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
add %ecx, %eax
|
|
|
|
# endif
|
|
|
|
RETURN
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2011-10-23 18:28:26 +00:00
|
|
|
L(bk_write_43bytes):
|
|
|
|
movq 35(%eax), %xmm0
|
|
|
|
movq %xmm0, 35(%edx)
|
|
|
|
L(bk_write_35bytes):
|
|
|
|
movq 27(%eax), %xmm0
|
|
|
|
movq %xmm0, 27(%edx)
|
|
|
|
L(bk_write_27bytes):
|
|
|
|
movq 19(%eax), %xmm0
|
|
|
|
movq %xmm0, 19(%edx)
|
|
|
|
L(bk_write_19bytes):
|
|
|
|
movq 11(%eax), %xmm0
|
|
|
|
movq %xmm0, 11(%edx)
|
|
|
|
L(bk_write_11bytes):
|
|
|
|
movq 3(%eax), %xmm0
|
|
|
|
movq %xmm0, 3(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_3bytes):
|
|
|
|
movzwl 1(%eax), %ecx
|
|
|
|
movw %cx, 1(%edx)
|
|
|
|
movzbl (%eax), %eax
|
|
|
|
movb %al, (%edx)
|
|
|
|
movl DEST(%esp), %eax
|
2022-02-10 14:20:54 +00:00
|
|
|
# ifdef USE_AS_MEMPCPY
|
2010-01-12 19:22:03 +00:00
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
add %ecx, %eax
|
|
|
|
# endif
|
|
|
|
RETURN_END
|
|
|
|
|
|
|
|
|
|
|
|
.pushsection .rodata.ssse3,"a",@progbits
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 2
|
2010-01-12 19:22:03 +00:00
|
|
|
L(table_48bytes_fwd):
|
|
|
|
.int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
|
|
|
|
.int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 2
|
2011-10-23 18:28:26 +00:00
|
|
|
L(table_48bytes_fwd_align):
|
|
|
|
.int JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
.int JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align))
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 2
|
2010-01-12 19:22:03 +00:00
|
|
|
L(shl_table):
|
|
|
|
.int JMPTBL (L(shl_0), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_1), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_2), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_3), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_4), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_5), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_6), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_7), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_8), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_9), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_10), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_11), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_12), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_13), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_14), L(shl_table))
|
|
|
|
.int JMPTBL (L(shl_15), L(shl_table))
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 2
|
2010-01-12 19:22:03 +00:00
|
|
|
L(table_48_bytes_bwd):
|
|
|
|
.int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
|
|
|
|
.int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
|
|
|
|
|
|
|
|
.popsection
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# ifdef USE_AS_MEMMOVE
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(copy_backward):
|
2012-03-30 20:45:27 +00:00
|
|
|
PUSH (%edi)
|
|
|
|
movl %eax, %edi
|
2010-01-12 19:22:03 +00:00
|
|
|
lea (%ecx,%edx,1),%edx
|
2012-03-30 20:45:27 +00:00
|
|
|
lea (%ecx,%edi,1),%edi
|
2010-01-12 19:22:03 +00:00
|
|
|
testl $0x3, %edx
|
|
|
|
jnz L(bk_align)
|
|
|
|
|
|
|
|
L(bk_aligned_4):
|
|
|
|
cmp $64, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jae L(bk_write_more64bytes)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
L(bk_write_64bytesless):
|
|
|
|
cmp $32, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(bk_write_less32bytes)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
L(bk_write_more32bytes):
|
|
|
|
/* Copy 32 bytes at a time. */
|
|
|
|
sub $32, %ecx
|
2012-03-30 20:45:27 +00:00
|
|
|
movq -8(%edi), %xmm0
|
2011-10-23 18:28:26 +00:00
|
|
|
movq %xmm0, -8(%edx)
|
2012-03-30 20:45:27 +00:00
|
|
|
movq -16(%edi), %xmm0
|
2011-10-23 18:28:26 +00:00
|
|
|
movq %xmm0, -16(%edx)
|
2012-03-30 20:45:27 +00:00
|
|
|
movq -24(%edi), %xmm0
|
2011-10-23 18:28:26 +00:00
|
|
|
movq %xmm0, -24(%edx)
|
2012-03-30 20:45:27 +00:00
|
|
|
movq -32(%edi), %xmm0
|
2011-10-23 18:28:26 +00:00
|
|
|
movq %xmm0, -32(%edx)
|
2010-01-12 19:22:03 +00:00
|
|
|
sub $32, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
sub $32, %edi
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
L(bk_write_less32bytes):
|
2012-03-30 20:45:27 +00:00
|
|
|
movl %edi, %eax
|
2010-01-12 19:22:03 +00:00
|
|
|
sub %ecx, %edx
|
|
|
|
sub %ecx, %eax
|
2012-03-30 20:45:27 +00:00
|
|
|
POP (%edi)
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_less32bytes_2):
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
CFI_PUSH (%edi)
|
|
|
|
|
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_align):
|
|
|
|
cmp $8, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jbe L(bk_write_less32bytes)
|
2010-01-12 19:22:03 +00:00
|
|
|
testl $1, %edx
|
|
|
|
/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
|
2012-03-30 20:45:27 +00:00
|
|
|
then (EDX & 2) must be != 0. */
|
2010-01-12 19:22:03 +00:00
|
|
|
jz L(bk_got2)
|
2012-03-30 20:45:27 +00:00
|
|
|
sub $1, %edi
|
2010-01-12 19:22:03 +00:00
|
|
|
sub $1, %ecx
|
|
|
|
sub $1, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
movzbl (%edi), %eax
|
2010-01-12 19:22:03 +00:00
|
|
|
movb %al, (%edx)
|
|
|
|
|
|
|
|
testl $2, %edx
|
|
|
|
jz L(bk_aligned_4)
|
|
|
|
|
|
|
|
L(bk_got2):
|
2012-03-30 20:45:27 +00:00
|
|
|
sub $2, %edi
|
2010-01-12 19:22:03 +00:00
|
|
|
sub $2, %ecx
|
|
|
|
sub $2, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
movzwl (%edi), %eax
|
2010-01-12 19:22:03 +00:00
|
|
|
movw %ax, (%edx)
|
|
|
|
jmp L(bk_aligned_4)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_write_more64bytes):
|
|
|
|
/* Check alignment of last byte. */
|
|
|
|
testl $15, %edx
|
|
|
|
jz L(bk_ssse3_cpy_pre)
|
|
|
|
|
|
|
|
/* EDX is aligned 4 bytes, but not 16 bytes. */
|
|
|
|
L(bk_ssse3_align):
|
2012-03-30 20:45:27 +00:00
|
|
|
sub $4, %edi
|
2010-01-12 19:22:03 +00:00
|
|
|
sub $4, %ecx
|
|
|
|
sub $4, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
movl (%edi), %eax
|
2010-01-12 19:22:03 +00:00
|
|
|
movl %eax, (%edx)
|
|
|
|
|
|
|
|
testl $15, %edx
|
|
|
|
jz L(bk_ssse3_cpy_pre)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
sub $4, %edi
|
2010-01-12 19:22:03 +00:00
|
|
|
sub $4, %ecx
|
|
|
|
sub $4, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
movl (%edi), %eax
|
2010-01-12 19:22:03 +00:00
|
|
|
movl %eax, (%edx)
|
|
|
|
|
|
|
|
testl $15, %edx
|
|
|
|
jz L(bk_ssse3_cpy_pre)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
sub $4, %edi
|
2010-01-12 19:22:03 +00:00
|
|
|
sub $4, %ecx
|
|
|
|
sub $4, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
movl (%edi), %eax
|
2010-01-12 19:22:03 +00:00
|
|
|
movl %eax, (%edx)
|
|
|
|
|
|
|
|
L(bk_ssse3_cpy_pre):
|
|
|
|
cmp $64, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jb L(bk_write_more32bytes)
|
2010-01-12 19:22:03 +00:00
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
.p2align 4
|
2010-01-12 19:22:03 +00:00
|
|
|
L(bk_ssse3_cpy):
|
2012-03-30 20:45:27 +00:00
|
|
|
sub $64, %edi
|
2010-01-12 19:22:03 +00:00
|
|
|
sub $64, %ecx
|
|
|
|
sub $64, %edx
|
2012-03-30 20:45:27 +00:00
|
|
|
movdqu 0x30(%edi), %xmm3
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa %xmm3, 0x30(%edx)
|
2012-03-30 20:45:27 +00:00
|
|
|
movdqu 0x20(%edi), %xmm2
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa %xmm2, 0x20(%edx)
|
2012-03-30 20:45:27 +00:00
|
|
|
movdqu 0x10(%edi), %xmm1
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa %xmm1, 0x10(%edx)
|
2012-03-30 20:45:27 +00:00
|
|
|
movdqu (%edi), %xmm0
|
2010-01-12 19:22:03 +00:00
|
|
|
movdqa %xmm0, (%edx)
|
|
|
|
cmp $64, %ecx
|
2010-02-25 02:20:57 +00:00
|
|
|
jae L(bk_ssse3_cpy)
|
2010-01-12 19:22:03 +00:00
|
|
|
jmp L(bk_write_64bytesless)
|
|
|
|
|
2012-03-30 20:45:27 +00:00
|
|
|
# endif
|
2010-01-12 19:22:03 +00:00
|
|
|
|
|
|
|
END (MEMCPY)
|
|
|
|
|
|
|
|
#endif
|