x86-64: Add glibc.cpu.prefer_map_32bit_exec [BZ #28656]

Crossing 2GB boundaries with indirect calls and jumps can use more
branch prediction resources on Intel Golden Cove CPU (see the
"Misprediction for Branches >2GB" section in Intel 64 and IA-32
Architectures Optimization Reference Manual.)  There is visible
performance improvement on workloads with many PLT calls when executable
and shared libraries are mmapped below 2GB.  Add the Prefer_MAP_32BIT_EXEC
bit so that mmap will try to map executable or denywrite pages in shared
libraries with MAP_32BIT first.

NB: Prefer_MAP_32BIT_EXEC reduces bits available for address space
layout randomization (ASLR), which is always disabled for SUID programs
and can only be enabled by the tunable, glibc.cpu.prefer_map_32bit_exec,
or the environment variable, LD_PREFER_MAP_32BIT_EXEC.  This works only
between shared libraries or between shared libraries and executables with
addresses below 2GB.  PIEs are usually loaded at a random address above
4GB by the kernel.
This commit is contained in:
H.J. Lu 2023-01-26 08:26:18 -08:00
parent bde1218720
commit 317f1c0a8a
9 changed files with 205 additions and 9 deletions

View File

@ -35,27 +35,32 @@ tunables with minimum and maximum values:
@example @example
$ /lib64/ld-linux-x86-64.so.2 --list-tunables $ /lib64/ld-linux-x86-64.so.2 --list-tunables
glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10) glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10)
glibc.elision.skip_lock_after_retries: 3 (min: -2147483648, max: 2147483647) glibc.elision.skip_lock_after_retries: 3 (min: 0, max: 2147483647)
glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0xffffffffffffffff) glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0xffffffffffffffff)
glibc.malloc.perturb: 0 (min: 0, max: 255) glibc.malloc.perturb: 0 (min: 0, max: 255)
glibc.cpu.x86_shared_cache_size: 0x100000 (min: 0x0, max: 0xffffffffffffffff) glibc.cpu.x86_shared_cache_size: 0x100000 (min: 0x0, max: 0xffffffffffffffff)
glibc.pthread.rseq: 1 (min: 0, max: 1)
glibc.cpu.prefer_map_32bit_exec: 0 (min: 0, max: 1)
glibc.mem.tagging: 0 (min: 0, max: 255) glibc.mem.tagging: 0 (min: 0, max: 255)
glibc.elision.tries: 3 (min: -2147483648, max: 2147483647) glibc.elision.tries: 3 (min: 0, max: 2147483647)
glibc.elision.enable: 0 (min: 0, max: 1) glibc.elision.enable: 0 (min: 0, max: 1)
glibc.cpu.x86_rep_movsb_threshold: 0x1000 (min: 0x100, max: 0xffffffffffffffff) glibc.malloc.hugetlb: 0x0 (min: 0x0, max: 0xffffffffffffffff)
glibc.cpu.x86_rep_movsb_threshold: 0x2000 (min: 0x100, max: 0xffffffffffffffff)
glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0xffffffffffffffff) glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0xffffffffffffffff)
glibc.elision.skip_lock_busy: 3 (min: -2147483648, max: 2147483647) glibc.rtld.dynamic_sort: 2 (min: 1, max: 2)
glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0xffffffffffffffff) glibc.elision.skip_lock_busy: 3 (min: 0, max: 2147483647)
glibc.malloc.top_pad: 0x20000 (min: 0x0, max: 0xffffffffffffffff)
glibc.cpu.x86_rep_stosb_threshold: 0x800 (min: 0x1, max: 0xffffffffffffffff) glibc.cpu.x86_rep_stosb_threshold: 0x800 (min: 0x1, max: 0xffffffffffffffff)
glibc.cpu.x86_non_temporal_threshold: 0xc0000 (min: 0x4040, max: 0x0fffffffffffffff) glibc.cpu.x86_non_temporal_threshold: 0xc0000 (min: 0x4040, max: 0xfffffffffffffff)
glibc.cpu.x86_shstk: glibc.cpu.x86_shstk:
glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff)
glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff) glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
glibc.malloc.mmap_max: 0 (min: -2147483648, max: 2147483647) glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
glibc.elision.skip_trylock_internal_abort: 3 (min: -2147483648, max: 2147483647) glibc.elision.skip_trylock_internal_abort: 3 (min: 0, max: 2147483647)
glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff) glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
glibc.cpu.x86_ibt: glibc.cpu.x86_ibt:
glibc.cpu.hwcaps: glibc.cpu.hwcaps:
glibc.elision.skip_lock_internal_abort: 3 (min: -2147483648, max: 2147483647) glibc.elision.skip_lock_internal_abort: 3 (min: 0, max: 2147483647)
glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0xffffffffffffffff) glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0xffffffffffffffff)
glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0xffffffffffffffff) glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0xffffffffffffffff)
glibc.cpu.x86_data_cache_size: 0x8000 (min: 0x0, max: 0xffffffffffffffff) glibc.cpu.x86_data_cache_size: 0x8000 (min: 0x0, max: 0xffffffffffffffff)
@ -583,6 +588,16 @@ instead.
This tunable is specific to i386 and x86-64. This tunable is specific to i386 and x86-64.
@end deftp @end deftp
@deftp Tunable glibc.cpu.prefer_map_32bit_exec
When this tunable is set to \code{1}, shared libraries of non-setuid
programs will be loaded below 2GB with MAP_32BIT.
Note that the @env{LD_PREFER_MAP_32BIT_EXEC} environment is an alias of
this tunable.
This tunable is specific to 64-bit x86-64.
@end deftp
@node Memory Related Tunables @node Memory Related Tunables
@section Memory Related Tunables @section Memory Related Tunables
@cindex memory related tunables @cindex memory related tunables

View File

@ -1,2 +1,27 @@
# The default ABI is 64. # The default ABI is 64.
default-abi := 64 default-abi := 64
ifeq ($(subdir),elf)
ifneq ($(have-tunables),no)
tests-map-32bit = \
tst-map-32bit-1a \
tst-map-32bit-1b \
# tests-map-32bit
tst-map-32bit-1a-no-pie = yes
tst-map-32bit-1b-no-pie = yes
tests += $(tests-map-32bit)
modules-map-32bit = \
tst-map-32bit-mod \
# modules-map-32bit
modules-names += $(modules-map-32bit)
$(objpfx)tst-map-32bit-mod.so: $(libsupport)
tst-map-32bit-1a-ENV = LD_PREFER_MAP_32BIT_EXEC=1
$(objpfx)tst-map-32bit-1a: $(objpfx)tst-map-32bit-mod.so
tst-map-32bit-1b-ENV = GLIBC_TUNABLES=glibc.cpu.prefer_map_32bit_exec=1
$(objpfx)tst-map-32bit-1b: $(objpfx)tst-map-32bit-mod.so
endif
endif

View File

@ -0,0 +1,29 @@
# x86-64 specific tunables.
# Copyright (C) 2023 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
glibc {
cpu {
prefer_map_32bit_exec {
type: INT_32
minval: 0
maxval: 1
env_alias: LD_PREFER_MAP_32BIT_EXEC
security_level: SXID_IGNORE
}
}
}

View File

@ -0,0 +1,43 @@
/* Linux mmap system call. x86-64 version.
Copyright (C) 2015-2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation; either version 2.1 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef MMAP_X86_64_INTERNAL_H
#define MMAP_X86_64_INTERNAL_H
#include <ldsodefs.h>
/* If the Prefer_MAP_32BIT_EXEC bit is set, try to map executable or
denywrite pages with MAP_32BIT first. */
#define MMAP_PREPARE(addr, len, prot, flags, fd, offset) \
if ((addr) == NULL \
&& (((prot) & PROT_EXEC) != 0 \
|| ((flags) & MAP_DENYWRITE) != 0) \
&& HAS_ARCH_FEATURE (Prefer_MAP_32BIT_EXEC)) \
{ \
void *ret = (void*) INLINE_SYSCALL_CALL (mmap, (addr), (len), \
(prot), \
(flags) | MAP_32BIT, \
(fd), (offset)); \
if (ret != MAP_FAILED) \
return ret; \
}
#include_next <mmap_internal.h>
#endif

View File

@ -0,0 +1,34 @@
/* Check that LD_PREFER_MAP_32BIT_EXEC works in PDE and shared library.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <stdio.h>
#include <stdint.h>
#include <support/check.h>
extern void dso_check_map_32bit (void);
static int
do_test (void)
{
printf ("do_test: %p\n", do_test);
TEST_VERIFY ((uintptr_t) do_test < 0xffffffffUL);
dso_check_map_32bit ();
return 0;
}
#include <support/test-driver.c>

View File

@ -0,0 +1 @@
#include "tst-map-32bit-1a.c"

View File

@ -0,0 +1,33 @@
/* Check that LD_PREFER_MAP_32BIT_EXEC works in shared library.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <stdio.h>
#include <stdint.h>
#include <support/check.h>
static void
dso_do_test (void)
{
}
void
dso_check_map_32bit (void)
{
printf ("dso_do_test: %p\n", dso_do_test);
TEST_VERIFY ((uintptr_t) dso_do_test < 0xffffffffUL);
}

View File

@ -27,6 +27,16 @@
extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
attribute_hidden; attribute_hidden;
# ifdef __LP64__
static void
TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
{
if (valp->numval)
GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
|= bit_arch_Prefer_MAP_32BIT_EXEC;
}
# endif
# if CET_ENABLED # if CET_ENABLED
extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *) extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
attribute_hidden; attribute_hidden;
@ -705,6 +715,11 @@ no_cpuid:
#if HAVE_TUNABLES #if HAVE_TUNABLES
TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps)); TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
# ifdef __LP64__
TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
# endif
bool disable_xsave_features = false; bool disable_xsave_features = false;
if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE)) if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))

View File

@ -26,6 +26,7 @@ BIT (I586)
BIT (I686) BIT (I686)
BIT (Slow_SSE4_2) BIT (Slow_SSE4_2)
BIT (AVX_Fast_Unaligned_Load) BIT (AVX_Fast_Unaligned_Load)
BIT (Prefer_MAP_32BIT_EXEC)
BIT (Prefer_No_VZEROUPPER) BIT (Prefer_No_VZEROUPPER)
BIT (Prefer_ERMS) BIT (Prefer_ERMS)
BIT (Prefer_No_AVX512) BIT (Prefer_No_AVX512)