mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-13 06:40:09 +00:00
c9cd7b0ce5
On POWER8, unaligned memory accesses to cached memory has little impact on performance as opposed to its ancestors. It is disabled by default and will only be available when the tunable glibc.tune.cached_memopt is set to 1. __memcpy_power8_cached __memcpy_power7 ============================================================ max-size=4096: 33325.70 ( 12.65%) 38153.00 max-size=8192: 32878.20 ( 11.17%) 37012.30 max-size=16384: 33782.20 ( 11.61%) 38219.20 max-size=32768: 33296.20 ( 11.30%) 37538.30 max-size=65536: 33765.60 ( 10.53%) 37738.40 * manual/tunables.texi (Hardware Capability Tunables): Document glibc.tune.cached_memopt. * sysdeps/powerpc/cpu-features.c: New file. * sysdeps/powerpc/cpu-features.h: New file. * sysdeps/powerpc/dl-procinfo.c [!IS_IN(ldconfig)]: Add _dl_powerpc_cpu_features. * sysdeps/powerpc/dl-tunables.list: New file. * sysdeps/powerpc/ldsodefs.h: Include cpu-features.h. * sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h (INIT_ARCH): Initialize use_aligned_memopt. * sysdeps/powerpc/powerpc64/dl-machine.h [defined(SHARED && IS_IN(rtld))]: Restrict dl_platform_init availability and initialize CPU features used by tunables. * sysdeps/powerpc/powerpc64/multiarch/Makefile (sysdep_routines): Add memcpy-power8-cached. * sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c: Add __memcpy_power8_cached. * sysdeps/powerpc/powerpc64/multiarch/memcpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S: New file. Reviewed-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
56 lines
2.3 KiB
C
56 lines
2.3 KiB
C
/* This file is part of the GNU C Library.
|
|
Copyright (C) 2013-2017 Free Software Foundation, Inc.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include <ldsodefs.h>
|
|
|
|
/* The code checks if _rtld_global_ro was realocated before trying to access
|
|
the dl_hwcap field. The assembly is to make the compiler not optimize the
|
|
test (&_rtld_global_ro != NULL), which is always true in ISO C (but not
|
|
in that case since _rtld_global_ro might not been realocated yet). */
|
|
#if defined(SHARED) && !IS_IN (rtld)
|
|
# define __GLRO(value) \
|
|
({ volatile void **__p = (volatile void**)(&_rtld_global_ro); \
|
|
unsigned long int __ret; \
|
|
asm ("# x in %0" : "+r" (__p)); \
|
|
__ret = (__p) ? GLRO(value) : 0; \
|
|
__ret; })
|
|
#else
|
|
# define __GLRO(value) GLRO(value)
|
|
#endif
|
|
|
|
/* dl_hwcap contains only the latest supported ISA, the macro checks which is
|
|
and fills the previous ones. */
|
|
#define INIT_ARCH() \
|
|
unsigned long int hwcap = __GLRO(dl_hwcap); \
|
|
unsigned long int __attribute__((unused)) hwcap2 = __GLRO(dl_hwcap2); \
|
|
bool __attribute__((unused)) use_cached_memopt = \
|
|
GLRO(dl_powerpc_cpu_features).use_cached_memopt; \
|
|
if (hwcap & PPC_FEATURE_ARCH_2_06) \
|
|
hwcap |= PPC_FEATURE_ARCH_2_05 | \
|
|
PPC_FEATURE_POWER5_PLUS | \
|
|
PPC_FEATURE_POWER5 | \
|
|
PPC_FEATURE_POWER4; \
|
|
else if (hwcap & PPC_FEATURE_ARCH_2_05) \
|
|
hwcap |= PPC_FEATURE_POWER5_PLUS | \
|
|
PPC_FEATURE_POWER5 | \
|
|
PPC_FEATURE_POWER4; \
|
|
else if (hwcap & PPC_FEATURE_POWER5_PLUS) \
|
|
hwcap |= PPC_FEATURE_POWER5 | \
|
|
PPC_FEATURE_POWER4; \
|
|
else if (hwcap & PPC_FEATURE_POWER5) \
|
|
hwcap |= PPC_FEATURE_POWER4;
|