glibc/sysdeps/x86_64/memmove.S
Noah Goldstein a7392db2ff x86: Optimize memmove-vec-unaligned-erms.S
No bug.

The optimizations are as follows:

1) Always align entry to 64 bytes. This makes behavior more
   predictable and makes other frontend optimizations easier.

2) Make the L(more_8x_vec) cases 4k aliasing aware. This can have
   significant benefits in the case that:
        0 < (dst - src) < [256, 512]

3) Align before `rep movsb`. For ERMS this is roughly a [0, 30%]
   improvement and for FSRM [-10%, 25%].

In addition to these primary changes there is general cleanup
throughout to optimize the aligning routines and control flow logic.

Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit a6b7502ec0)
2022-04-26 18:18:16 -07:00

72 lines
2.0 KiB
ArmAsm

/* Optimized memmove for x86-64.
Copyright (C) 2016-2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#define VEC_SIZE 16
#define VEC(i) xmm##i
#define PREFETCHNT prefetchnta
#define VMOVNT movntdq
/* Use movups and movaps for smaller code sizes. */
#define VMOVU movups
#define VMOVA movaps
#define MOV_SIZE 3
#define SECTION(p) p
#ifdef USE_MULTIARCH
# if !IS_IN (libc)
# define MEMCPY_SYMBOL(p,s) memcpy
# endif
#else
# if defined SHARED && IS_IN (libc)
# define MEMCPY_SYMBOL(p,s) __memcpy
# else
# define MEMCPY_SYMBOL(p,s) memcpy
# endif
#endif
#if !defined USE_MULTIARCH || !IS_IN (libc)
# define MEMPCPY_SYMBOL(p,s) __mempcpy
#endif
#ifndef MEMMOVE_SYMBOL
# define MEMMOVE_CHK_SYMBOL(p,s) p
# define MEMMOVE_SYMBOL(p,s) memmove
#endif
#include "multiarch/memmove-vec-unaligned-erms.S"
#ifndef USE_MULTIARCH
libc_hidden_builtin_def (memmove)
# if defined SHARED && IS_IN (libc)
strong_alias (memmove, __memcpy)
libc_hidden_ver (memmove, memcpy)
# endif
libc_hidden_def (__mempcpy)
weak_alias (__mempcpy, mempcpy)
libc_hidden_builtin_def (mempcpy)
# if defined SHARED && IS_IN (libc)
# undef memcpy
# include <shlib-compat.h>
versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
# endif
# endif
#endif