mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-05 09:01:07 +00:00
0edbf12301
Current allocate_stack logic for create stacks is to first mmap all the required memory with the desirable memory and then mprotect the guard area with PROT_NONE if required. Although it works as expected, it pessimizes the allocation because it requires the kernel to actually increase commit charge (it counts against the available physical/swap memory available for the system). The only issue is to actually check this change since side-effects are really Linux specific and to actually account them it would require a kernel specific tests to parse the system wide information. On the kernel I checked /proc/self/statm does not show any meaningful difference for vmm and/or rss before and after thread creation. I could only see really meaningful information checking on system wide /proc/meminfo between thread creation: MemFree, MemAvailable, and Committed_AS shows large difference without the patch. I think trying to use these kind of information on a testcase is fragile. The BZ#18988 reports shows that the commit pages are easily seen with mlockall (MCL_FUTURE) (with lock all pages that become mapped in the process) however a more straighfoward testcase shows that pthread_create could be faster using this patch: -- static const int inner_count = 256; static const int outer_count = 128; static void *thread1(void *arg) { return NULL; } static void *sleeper(void *arg) { pthread_t ts[inner_count]; for (int i = 0; i < inner_count; i++) pthread_create (&ts[i], &a, thread1, NULL); for (int i = 0; i < inner_count; i++) pthread_join (ts[i], NULL); return NULL; } int main(void) { pthread_attr_init(&a); pthread_attr_setguardsize(&a, 1<<20); pthread_attr_setstacksize(&a, 1134592); pthread_t ts[outer_count]; for (int i = 0; i < outer_count; i++) pthread_create(&ts[i], &a, sleeper, NULL); for (int i = 0; i < outer_count; i++) pthread_join(ts[i], NULL); assert(r == 0); } return 0; } -- On x86_64 (4.4.0-45-generic, gcc 5.4.0) running the small benchtests I see: $ time ./test real 0m3.647s user 0m0.080s sys 0m11.836s While with the patch I see: $ time ./test real 0m0.696s user 0m0.040s sys 0m1.152s So I added a pthread_create benchtest (thread_create) which check the thread creation latency. As for the simple benchtests, I saw improvements in thread creation on all architectures I tested the change. Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu, arm-linux-gnueabihf, powerpc64le-linux-gnu, sparc64-linux-gnu, and sparcv9-linux-gnu. [BZ #18988] * benchtests/thread_create-inputs: New file. * benchtests/thread_create-source.c: Likewise. * support/xpthread_attr_setguardsize.c: Likewise. * support/Makefile (libsupport-routines): Add xpthread_attr_setguardsize object. * support/xthread.h: Add xpthread_attr_setguardsize prototype. * benchtests/Makefile (bench-pthread): Add thread_create. * nptl/allocatestack.c (allocate_stack): Call mmap with PROT_NONE and then mprotect the required area.
80 lines
3.4 KiB
C
80 lines
3.4 KiB
C
/* Support functionality for using threads.
|
|
Copyright (C) 2016-2017 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef SUPPORT_THREAD_H
|
|
#define SUPPORT_THREAD_H
|
|
|
|
#include <pthread.h>
|
|
#include <sys/cdefs.h>
|
|
|
|
__BEGIN_DECLS
|
|
|
|
/* Terminate the process (with exit status 0) after SECONDS have
|
|
elapsed, from a helper thread. The process is terminated with the
|
|
exit function, so atexit handlers are executed. */
|
|
void delayed_exit (int seconds);
|
|
|
|
/* Terminate the process (with exit status 1) if VALUE is not zero.
|
|
In that case, print a failure message to standard output mentioning
|
|
FUNCTION. The process is terminated with the exit function, so
|
|
atexit handlers are executed. */
|
|
void xpthread_check_return (const char *function, int value);
|
|
|
|
/* The following functions call the corresponding libpthread functions
|
|
and terminate the process on error. */
|
|
|
|
void xpthread_barrier_init (pthread_barrier_t *barrier,
|
|
pthread_barrierattr_t *attr, unsigned int count);
|
|
void xpthread_barrier_destroy (pthread_barrier_t *barrier);
|
|
void xpthread_mutexattr_destroy (pthread_mutexattr_t *);
|
|
void xpthread_mutexattr_init (pthread_mutexattr_t *);
|
|
void xpthread_mutexattr_setprotocol (pthread_mutexattr_t *, int);
|
|
void xpthread_mutexattr_setpshared (pthread_mutexattr_t *, int);
|
|
void xpthread_mutexattr_setrobust (pthread_mutexattr_t *, int);
|
|
void xpthread_mutexattr_settype (pthread_mutexattr_t *, int);
|
|
void xpthread_mutex_init (pthread_mutex_t *, const pthread_mutexattr_t *);
|
|
void xpthread_mutex_destroy (pthread_mutex_t *);
|
|
void xpthread_mutex_lock (pthread_mutex_t *mutex);
|
|
void xpthread_mutex_unlock (pthread_mutex_t *mutex);
|
|
void xpthread_mutex_consistent (pthread_mutex_t *);
|
|
void xpthread_spin_lock (pthread_spinlock_t *lock);
|
|
void xpthread_spin_unlock (pthread_spinlock_t *lock);
|
|
void xpthread_cond_wait (pthread_cond_t * cond, pthread_mutex_t * mutex);
|
|
pthread_t xpthread_create (pthread_attr_t *attr,
|
|
void *(*thread_func) (void *), void *closure);
|
|
void xpthread_detach (pthread_t thr);
|
|
void xpthread_cancel (pthread_t thr);
|
|
void *xpthread_join (pthread_t thr);
|
|
void xpthread_once (pthread_once_t *guard, void (*func) (void));
|
|
void xpthread_attr_destroy (pthread_attr_t *attr);
|
|
void xpthread_attr_init (pthread_attr_t *attr);
|
|
void xpthread_attr_setdetachstate (pthread_attr_t *attr,
|
|
int detachstate);
|
|
void xpthread_attr_setstacksize (pthread_attr_t *attr,
|
|
size_t stacksize);
|
|
void xpthread_attr_setguardsize (pthread_attr_t *attr,
|
|
size_t guardsize);
|
|
|
|
/* This function returns non-zero if pthread_barrier_wait returned
|
|
PTHREAD_BARRIER_SERIAL_THREAD. */
|
|
int xpthread_barrier_wait (pthread_barrier_t *barrier);
|
|
|
|
__END_DECLS
|
|
|
|
#endif /* SUPPORT_THREAD_H */
|