mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-08 14:20:07 +00:00
nptl: Invert the mmap/mprotect logic on allocated stacks (BZ#18988)
Current allocate_stack logic for create stacks is to first mmap all the required memory with the desirable memory and then mprotect the guard area with PROT_NONE if required. Although it works as expected, it pessimizes the allocation because it requires the kernel to actually increase commit charge (it counts against the available physical/swap memory available for the system). The only issue is to actually check this change since side-effects are really Linux specific and to actually account them it would require a kernel specific tests to parse the system wide information. On the kernel I checked /proc/self/statm does not show any meaningful difference for vmm and/or rss before and after thread creation. I could only see really meaningful information checking on system wide /proc/meminfo between thread creation: MemFree, MemAvailable, and Committed_AS shows large difference without the patch. I think trying to use these kind of information on a testcase is fragile. The BZ#18988 reports shows that the commit pages are easily seen with mlockall (MCL_FUTURE) (with lock all pages that become mapped in the process) however a more straighfoward testcase shows that pthread_create could be faster using this patch: -- static const int inner_count = 256; static const int outer_count = 128; static void *thread1(void *arg) { return NULL; } static void *sleeper(void *arg) { pthread_t ts[inner_count]; for (int i = 0; i < inner_count; i++) pthread_create (&ts[i], &a, thread1, NULL); for (int i = 0; i < inner_count; i++) pthread_join (ts[i], NULL); return NULL; } int main(void) { pthread_attr_init(&a); pthread_attr_setguardsize(&a, 1<<20); pthread_attr_setstacksize(&a, 1134592); pthread_t ts[outer_count]; for (int i = 0; i < outer_count; i++) pthread_create(&ts[i], &a, sleeper, NULL); for (int i = 0; i < outer_count; i++) pthread_join(ts[i], NULL); assert(r == 0); } return 0; } -- On x86_64 (4.4.0-45-generic, gcc 5.4.0) running the small benchtests I see: $ time ./test real 0m3.647s user 0m0.080s sys 0m11.836s While with the patch I see: $ time ./test real 0m0.696s user 0m0.040s sys 0m1.152s So I added a pthread_create benchtest (thread_create) which check the thread creation latency. As for the simple benchtests, I saw improvements in thread creation on all architectures I tested the change. Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu, arm-linux-gnueabihf, powerpc64le-linux-gnu, sparc64-linux-gnu, and sparcv9-linux-gnu. [BZ #18988] * benchtests/thread_create-inputs: New file. * benchtests/thread_create-source.c: Likewise. * support/xpthread_attr_setguardsize.c: Likewise. * support/Makefile (libsupport-routines): Add xpthread_attr_setguardsize object. * support/xthread.h: Add xpthread_attr_setguardsize prototype. * benchtests/Makefile (bench-pthread): Add thread_create. * nptl/allocatestack.c (allocate_stack): Call mmap with PROT_NONE and then mprotect the required area.
This commit is contained in:
parent
5c3e322d3b
commit
0edbf12301
15
ChangeLog
15
ChangeLog
@ -1,3 +1,18 @@
|
||||
2017-06-14 Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
|
||||
[BZ #18988]
|
||||
* benchtests/thread_create-inputs: New file.
|
||||
* benchtests/thread_create-source.c: Likewise.
|
||||
* support/xpthread_attr_setguardsize.c: Likewise.
|
||||
* support/Makefile (libsupport-routines): Add
|
||||
xpthread_attr_setguardsize object.
|
||||
* support/xthread.h: Add xpthread_attr_setguardsize prototype.
|
||||
* benchtests/Makefile (bench-pthread): Add thread_create.
|
||||
* nptl/allocatestack.c (allocate_stack): Call mmap with PROT_NONE and
|
||||
then mprotect the required area.
|
||||
(guard_position): New function.
|
||||
(setup_stack_prot): Likewise.
|
||||
|
||||
2017-06-14 H.J. Lu <hongjiu.lu@intel.com>
|
||||
Erich Elsen <eriche@google.com>
|
||||
Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
|
@ -25,7 +25,7 @@ bench-math := acos acosh asin asinh atan atanh cos cosh exp exp2 log log2 \
|
||||
modf pow rint sin sincos sinh sqrt tan tanh fmin fmax fminf \
|
||||
fmaxf
|
||||
|
||||
bench-pthread := pthread_once
|
||||
bench-pthread := pthread_once thread_create
|
||||
|
||||
bench-string := ffs ffsll
|
||||
|
||||
|
14
benchtests/thread_create-inputs
Normal file
14
benchtests/thread_create-inputs
Normal file
@ -0,0 +1,14 @@
|
||||
## args: int:size_t:size_t
|
||||
## init: thread_create_init
|
||||
## includes: pthread.h
|
||||
## include-sources: thread_create-source.c
|
||||
|
||||
## name: stack=1024,guard=1
|
||||
32, 1024, 1
|
||||
## name: stack=1024,guard=2
|
||||
32, 1024, 2
|
||||
|
||||
## name: stack=2048,guard=1
|
||||
32, 2048, 1
|
||||
## name: stack=2048,guard=2
|
||||
32, 2048, 2
|
58
benchtests/thread_create-source.c
Normal file
58
benchtests/thread_create-source.c
Normal file
@ -0,0 +1,58 @@
|
||||
/* Measure pthread_create thread creation with different stack
|
||||
and guard sizes.
|
||||
|
||||
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <support/xthread.h>
|
||||
|
||||
static size_t pgsize;
|
||||
|
||||
static void
|
||||
thread_create_init (void)
|
||||
{
|
||||
pgsize = sysconf (_SC_PAGESIZE);
|
||||
}
|
||||
|
||||
static void *
|
||||
thread_dummy (void *arg)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
thread_create (int nthreads, size_t stacksize, size_t guardsize)
|
||||
{
|
||||
pthread_attr_t attr;
|
||||
xpthread_attr_init (&attr);
|
||||
|
||||
stacksize = stacksize * pgsize;
|
||||
guardsize = guardsize * pgsize;
|
||||
|
||||
xpthread_attr_setstacksize (&attr, stacksize);
|
||||
xpthread_attr_setguardsize (&attr, guardsize);
|
||||
|
||||
pthread_t ts[nthreads];
|
||||
|
||||
for (int i = 0; i < nthreads; i++)
|
||||
ts[i] = xpthread_create (&attr, thread_dummy, NULL);
|
||||
|
||||
for (int i = 0; i < nthreads; i++)
|
||||
xpthread_join (ts[i]);
|
||||
}
|
@ -334,6 +334,43 @@ change_stack_perm (struct pthread *pd
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return the guard page position on allocated stack. */
|
||||
static inline char *
|
||||
__attribute ((always_inline))
|
||||
guard_position (void *mem, size_t size, size_t guardsize, struct pthread *pd,
|
||||
size_t pagesize_m1)
|
||||
{
|
||||
#ifdef NEED_SEPARATE_REGISTER_STACK
|
||||
return mem + (((size - guardsize) / 2) & ~pagesize_m1);
|
||||
#elif _STACK_GROWS_DOWN
|
||||
return mem;
|
||||
#elif _STACK_GROWS_UP
|
||||
return (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Based on stack allocated with PROT_NONE, setup the required portions with
|
||||
'prot' flags based on the guard page position. */
|
||||
static inline int
|
||||
setup_stack_prot (char *mem, size_t size, char *guard, size_t guardsize,
|
||||
const int prot)
|
||||
{
|
||||
char *guardend = guard + guardsize;
|
||||
#if _STACK_GROWS_DOWN
|
||||
/* As defined at guard_position, for architectures with downward stack
|
||||
the guard page is always at start of the allocated area. */
|
||||
if (mprotect (guardend, size - guardsize, prot) != 0)
|
||||
return errno;
|
||||
#else
|
||||
size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem;
|
||||
if (mprotect (mem, mprots1, prot) != 0)
|
||||
return errno;
|
||||
size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend;
|
||||
if (mprotect (guardend, mprots2, prot) != 0)
|
||||
return errno;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns a usable stack for a new thread either by allocating a
|
||||
new stack or reusing a cached stack of sufficient size.
|
||||
@ -490,7 +527,10 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
|
||||
size += pagesize_m1 + 1;
|
||||
#endif
|
||||
|
||||
mem = mmap (NULL, size, prot,
|
||||
/* If a guard page is required, avoid committing memory by first
|
||||
allocate with PROT_NONE and then reserve with required permission
|
||||
excluding the guard page. */
|
||||
mem = mmap (NULL, size, (guardsize == 0) ? prot : PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
|
||||
|
||||
if (__glibc_unlikely (mem == MAP_FAILED))
|
||||
@ -510,9 +550,24 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
|
||||
- TLS_PRE_TCB_SIZE);
|
||||
#endif
|
||||
|
||||
/* Now mprotect the required region excluding the guard area. */
|
||||
if (__glibc_likely (guardsize > 0))
|
||||
{
|
||||
char *guard = guard_position (mem, size, guardsize, pd,
|
||||
pagesize_m1);
|
||||
if (setup_stack_prot (mem, size, guard, guardsize, prot) != 0)
|
||||
{
|
||||
munmap (mem, size);
|
||||
return errno;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remember the stack-related values. */
|
||||
pd->stackblock = mem;
|
||||
pd->stackblock_size = size;
|
||||
/* Update guardsize for newly allocated guardsize to avoid
|
||||
an mprotect in guard resize below. */
|
||||
pd->guardsize = guardsize;
|
||||
|
||||
/* We allocated the first block thread-specific data array.
|
||||
This address will not change for the lifetime of this
|
||||
@ -593,13 +648,8 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
|
||||
/* Create or resize the guard area if necessary. */
|
||||
if (__glibc_unlikely (guardsize > pd->guardsize))
|
||||
{
|
||||
#ifdef NEED_SEPARATE_REGISTER_STACK
|
||||
char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
|
||||
#elif _STACK_GROWS_DOWN
|
||||
char *guard = mem;
|
||||
#elif _STACK_GROWS_UP
|
||||
char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
|
||||
#endif
|
||||
char *guard = guard_position (mem, size, guardsize, pd,
|
||||
pagesize_m1);
|
||||
if (mprotect (guard, guardsize, PROT_NONE) != 0)
|
||||
{
|
||||
mprot_error:
|
||||
|
@ -82,6 +82,7 @@ libsupport-routines = \
|
||||
xpthread_attr_init \
|
||||
xpthread_attr_setdetachstate \
|
||||
xpthread_attr_setstacksize \
|
||||
xpthread_attr_setguardsize \
|
||||
xpthread_barrier_destroy \
|
||||
xpthread_barrier_init \
|
||||
xpthread_barrier_wait \
|
||||
|
26
support/xpthread_attr_setguardsize.c
Normal file
26
support/xpthread_attr_setguardsize.c
Normal file
@ -0,0 +1,26 @@
|
||||
/* pthread_attr_setguardsize with error checking.
|
||||
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <support/xthread.h>
|
||||
|
||||
void
|
||||
xpthread_attr_setguardsize (pthread_attr_t *attr, size_t guardsize)
|
||||
{
|
||||
xpthread_check_return ("pthread_attr_setguardize",
|
||||
pthread_attr_setguardsize (attr, guardsize));
|
||||
}
|
@ -67,6 +67,8 @@ void xpthread_attr_setdetachstate (pthread_attr_t *attr,
|
||||
int detachstate);
|
||||
void xpthread_attr_setstacksize (pthread_attr_t *attr,
|
||||
size_t stacksize);
|
||||
void xpthread_attr_setguardsize (pthread_attr_t *attr,
|
||||
size_t guardsize);
|
||||
|
||||
/* This function returns non-zero if pthread_barrier_wait returned
|
||||
PTHREAD_BARRIER_SERIAL_THREAD. */
|
||||
|
Loading…
Reference in New Issue
Block a user