mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-23 03:10:05 +00:00
0edbf12301
Current allocate_stack logic for create stacks is to first mmap all the required memory with the desirable memory and then mprotect the guard area with PROT_NONE if required. Although it works as expected, it pessimizes the allocation because it requires the kernel to actually increase commit charge (it counts against the available physical/swap memory available for the system). The only issue is to actually check this change since side-effects are really Linux specific and to actually account them it would require a kernel specific tests to parse the system wide information. On the kernel I checked /proc/self/statm does not show any meaningful difference for vmm and/or rss before and after thread creation. I could only see really meaningful information checking on system wide /proc/meminfo between thread creation: MemFree, MemAvailable, and Committed_AS shows large difference without the patch. I think trying to use these kind of information on a testcase is fragile. The BZ#18988 reports shows that the commit pages are easily seen with mlockall (MCL_FUTURE) (with lock all pages that become mapped in the process) however a more straighfoward testcase shows that pthread_create could be faster using this patch: -- static const int inner_count = 256; static const int outer_count = 128; static void *thread1(void *arg) { return NULL; } static void *sleeper(void *arg) { pthread_t ts[inner_count]; for (int i = 0; i < inner_count; i++) pthread_create (&ts[i], &a, thread1, NULL); for (int i = 0; i < inner_count; i++) pthread_join (ts[i], NULL); return NULL; } int main(void) { pthread_attr_init(&a); pthread_attr_setguardsize(&a, 1<<20); pthread_attr_setstacksize(&a, 1134592); pthread_t ts[outer_count]; for (int i = 0; i < outer_count; i++) pthread_create(&ts[i], &a, sleeper, NULL); for (int i = 0; i < outer_count; i++) pthread_join(ts[i], NULL); assert(r == 0); } return 0; } -- On x86_64 (4.4.0-45-generic, gcc 5.4.0) running the small benchtests I see: $ time ./test real 0m3.647s user 0m0.080s sys 0m11.836s While with the patch I see: $ time ./test real 0m0.696s user 0m0.040s sys 0m1.152s So I added a pthread_create benchtest (thread_create) which check the thread creation latency. As for the simple benchtests, I saw improvements in thread creation on all architectures I tested the change. Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu, arm-linux-gnueabihf, powerpc64le-linux-gnu, sparc64-linux-gnu, and sparcv9-linux-gnu. [BZ #18988] * benchtests/thread_create-inputs: New file. * benchtests/thread_create-source.c: Likewise. * support/xpthread_attr_setguardsize.c: Likewise. * support/Makefile (libsupport-routines): Add xpthread_attr_setguardsize object. * support/xthread.h: Add xpthread_attr_setguardsize prototype. * benchtests/Makefile (bench-pthread): Add thread_create. * nptl/allocatestack.c (allocate_stack): Call mmap with PROT_NONE and then mprotect the required area. |
||
---|---|---|
.. | ||
capture_subprocess.h | ||
check_addrinfo.c | ||
check_dns_packet.c | ||
check_hostent.c | ||
check_netent.c | ||
check_nss.h | ||
check.c | ||
check.h | ||
delayed_exit.c | ||
format_nss.h | ||
ignore_stderr.c | ||
Makefile | ||
namespace.h | ||
oom_error.c | ||
README | ||
README-testing.c | ||
resolv_test.c | ||
resolv_test.h | ||
run_diff.h | ||
set_fortify_handler.c | ||
support_become_root.c | ||
support_can_chroot.c | ||
support_capture_subprocess_check.c | ||
support_capture_subprocess.c | ||
support_enter_network_namespace.c | ||
support_format_address_family.c | ||
support_format_addrinfo.c | ||
support_format_dns_packet.c | ||
support_format_herrno.c | ||
support_format_hostent.c | ||
support_format_netent.c | ||
support_isolate_in_subprocess.c | ||
support_record_failure.c | ||
support_run_diff.c | ||
support_shared_allocate.c | ||
support_test_main.c | ||
support_test_verify_impl.c | ||
support_write_file_string.c | ||
support-xstat.c | ||
support.h | ||
temp_file-internal.h | ||
temp_file.c | ||
temp_file.h | ||
test-driver.c | ||
test-driver.h | ||
tst-support_capture_subprocess.c | ||
tst-support_format_dns_packet.c | ||
tst-support_record_failure-2.sh | ||
tst-support_record_failure.c | ||
tst-support-namespace.c | ||
write_message.c | ||
xaccept4.c | ||
xaccept.c | ||
xasprintf.c | ||
xbind.c | ||
xcalloc.c | ||
xchroot.c | ||
xclose.c | ||
xconnect.c | ||
xdup2.c | ||
xfclose.c | ||
xfopen.c | ||
xfork.c | ||
xgetsockname.c | ||
xlisten.c | ||
xmalloc.c | ||
xmemstream.c | ||
xmemstream.h | ||
xmkdir.c | ||
xmmap.c | ||
xmunmap.c | ||
xopen.c | ||
xpipe.c | ||
xpoll.c | ||
xpthread_attr_destroy.c | ||
xpthread_attr_init.c | ||
xpthread_attr_setdetachstate.c | ||
xpthread_attr_setguardsize.c | ||
xpthread_attr_setstacksize.c | ||
xpthread_barrier_destroy.c | ||
xpthread_barrier_init.c | ||
xpthread_barrier_wait.c | ||
xpthread_cancel.c | ||
xpthread_check_return.c | ||
xpthread_cond_wait.c | ||
xpthread_create.c | ||
xpthread_detach.c | ||
xpthread_join.c | ||
xpthread_mutex_consistent.c | ||
xpthread_mutex_destroy.c | ||
xpthread_mutex_init.c | ||
xpthread_mutex_lock.c | ||
xpthread_mutex_unlock.c | ||
xpthread_mutexattr_destroy.c | ||
xpthread_mutexattr_init.c | ||
xpthread_mutexattr_setprotocol.c | ||
xpthread_mutexattr_setpshared.c | ||
xpthread_mutexattr_setrobust.c | ||
xpthread_mutexattr_settype.c | ||
xpthread_once.c | ||
xpthread_sigmask.c | ||
xpthread_spin_lock.c | ||
xpthread_spin_unlock.c | ||
xrealloc.c | ||
xrecvfrom.c | ||
xsendto.c | ||
xsetsockopt.c | ||
xsignal.h | ||
xsocket.c | ||
xsocket.h | ||
xstdio.h | ||
xstrdup.c | ||
xthread.h | ||
xunistd.h | ||
xwaitpid.c | ||
xwrite.c |
This subdirectory contains infrastructure which is not put into installed libraries, but may be linked into programs (installed or not) and tests. # Error-checking wrappers These wrappers test for error return codes an terminate the process on error. They are declared in these header files: * support.h * xsignal.h * xthread.h In general, new wrappers should be added to support.h if possible. However, support.h must remain fully compatible with C90 and therefore cannot include headers which use identifers not reserved in C90. If the wrappers need additional types, additional headers such as signal.h need to be introduced. # Test framework The test framework provides a main program for tests, including a timeout for hanging tests. See README-testing.c for a minimal example, and test-driver.c for details how to use it. The following header files provide related declarations: * check.h * temp_file.h * test-driver.h