AArch64: Check kernel version for SVE ifuncs

Old Linux kernels disable SVE after every system call.  Calling the
SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
As a result, applications with a high use of syscalls may run slower with
the SVE memcpy.  This is true for kernels between 4.15.0 and before 6.2.0,
except for 5.14.0 which was patched.  Avoid this by checking the kernel
version and selecting the SVE ifunc on modern kernels.

Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
value without calling any library functions.  If uname() is not supported or
if the version format is not recognized, assume the kernel is modern.

Tested-by: Florian Weimer <fweimer@redhat.com>
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
(cherry picked from commit 2e94e2f5d2)
This commit is contained in:
Wilco Dijkstra 2024-03-21 16:48:33 +00:00
parent 07aa48b111
commit a4c897e4c7
5 changed files with 54 additions and 3 deletions

View File

@ -36,5 +36,7 @@
MTE_ENABLED (); \ MTE_ENABLED (); \
bool __attribute__((unused)) sve = \ bool __attribute__((unused)) sve = \
GLRO(dl_aarch64_cpu_features).sve; \ GLRO(dl_aarch64_cpu_features).sve; \
bool __attribute__((unused)) prefer_sve_ifuncs = \
GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs; \
bool __attribute__((unused)) mops = \ bool __attribute__((unused)) mops = \
GLRO(dl_aarch64_cpu_features).mops; GLRO(dl_aarch64_cpu_features).mops;

View File

@ -47,7 +47,7 @@ select_memcpy_ifunc (void)
{ {
if (IS_A64FX (midr)) if (IS_A64FX (midr))
return __memcpy_a64fx; return __memcpy_a64fx;
return __memcpy_sve; return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic;
} }
if (IS_THUNDERX (midr)) if (IS_THUNDERX (midr))

View File

@ -47,7 +47,7 @@ select_memmove_ifunc (void)
{ {
if (IS_A64FX (midr)) if (IS_A64FX (midr))
return __memmove_a64fx; return __memmove_a64fx;
return __memmove_sve; return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic;
} }
if (IS_THUNDERX (midr)) if (IS_THUNDERX (midr))

View File

@ -20,6 +20,7 @@
#include <sys/auxv.h> #include <sys/auxv.h>
#include <elf/dl-hwcaps.h> #include <elf/dl-hwcaps.h>
#include <sys/prctl.h> #include <sys/prctl.h>
#include <sys/utsname.h>
#define DCZID_DZP_MASK (1 << 4) #define DCZID_DZP_MASK (1 << 4)
#define DCZID_BS_MASK (0xf) #define DCZID_BS_MASK (0xf)
@ -59,6 +60,46 @@ get_midr_from_mcpu (const char *mcpu)
} }
#endif #endif
#if __LINUX_KERNEL_VERSION < 0x060200
/* Return true if we prefer using SVE in string ifuncs. Old kernels disable
SVE after every system call which results in unnecessary traps if memcpy
uses SVE. This is true for kernels between 4.15.0 and before 6.2.0, except
for 5.14.0 which was patched. For these versions return false to avoid using
SVE ifuncs.
Parse the kernel version into a 24-bit kernel.major.minor value without
calling any library functions. If uname() is not supported or if the version
format is not recognized, assume the kernel is modern and return true. */
static inline bool
prefer_sve_ifuncs (void)
{
struct utsname buf;
const char *p = &buf.release[0];
int kernel = 0;
int val;
if (__uname (&buf) < 0)
return true;
for (int shift = 16; shift >= 0; shift -= 8)
{
for (val = 0; *p >= '0' && *p <= '9'; p++)
val = val * 10 + *p - '0';
kernel |= (val & 255) << shift;
if (*p++ != '.')
break;
}
if (kernel >= 0x060200 || kernel == 0x050e00)
return true;
if (kernel >= 0x040f00)
return false;
return true;
}
#endif
static inline void static inline void
init_cpu_features (struct cpu_features *cpu_features) init_cpu_features (struct cpu_features *cpu_features)
{ {
@ -119,6 +160,13 @@ init_cpu_features (struct cpu_features *cpu_features)
/* Check if SVE is supported. */ /* Check if SVE is supported. */
cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE; cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE;
cpu_features->prefer_sve_ifuncs = cpu_features->sve;
#if __LINUX_KERNEL_VERSION < 0x060200
if (cpu_features->sve)
cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs ();
#endif
/* Check if MOPS is supported. */ /* Check if MOPS is supported. */
cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS; cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
} }

View File

@ -1,6 +1,6 @@
/* Initialize CPU feature data. AArch64 version. /* Initialize CPU feature data. AArch64 version.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Copyright (C) 2017-2021 Free Software Foundation, Inc. Copyright (C) 2017-2024 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public modify it under the terms of the GNU Lesser General Public
@ -71,6 +71,7 @@ struct cpu_features
/* Currently, the GLIBC memory tagging tunable only defines 8 bits. */ /* Currently, the GLIBC memory tagging tunable only defines 8 bits. */
uint8_t mte_state; uint8_t mte_state;
bool sve; bool sve;
bool prefer_sve_ifuncs;
bool mops; bool mops;
}; };