glibc/support/Makefile
Adhemerval Zanella 0edbf12301 nptl: Invert the mmap/mprotect logic on allocated stacks (BZ#18988)
Current allocate_stack logic for create stacks is to first mmap all
the required memory with the desirable memory and then mprotect the
guard area with PROT_NONE if required.  Although it works as expected,
it pessimizes the allocation because it requires the kernel to actually
increase commit charge (it counts against the available physical/swap
memory available for the system).

The only issue is to actually check this change since side-effects are
really Linux specific and to actually account them it would require a
kernel specific tests to parse the system wide information.  On the kernel
I checked /proc/self/statm does not show any meaningful difference for
vmm and/or rss before and after thread creation.  I could only see
really meaningful information checking on system wide /proc/meminfo
between thread creation: MemFree, MemAvailable, and Committed_AS shows
large difference without the patch.  I think trying to use these
kind of information on a testcase is fragile.

The BZ#18988 reports shows that the commit pages are easily seen with
mlockall (MCL_FUTURE) (with lock all pages that become mapped in the
process) however a more straighfoward testcase shows that pthread_create
could be faster using this patch:

--
static const int inner_count = 256;
static const int outer_count = 128;

static
void *thread1(void *arg)
{
  return NULL;
}

static
void *sleeper(void *arg)
{
  pthread_t ts[inner_count];
  for (int i = 0; i < inner_count; i++)
    pthread_create (&ts[i], &a, thread1, NULL);
  for (int i = 0; i < inner_count; i++)
    pthread_join (ts[i], NULL);

  return NULL;
}

int main(void)
{
  pthread_attr_init(&a);
  pthread_attr_setguardsize(&a, 1<<20);
  pthread_attr_setstacksize(&a, 1134592);

  pthread_t ts[outer_count];
  for (int i = 0; i < outer_count; i++)
    pthread_create(&ts[i], &a, sleeper, NULL);
  for (int i = 0; i < outer_count; i++)
    pthread_join(ts[i], NULL);
    assert(r == 0);
  }
  return 0;
}

--

On x86_64 (4.4.0-45-generic, gcc 5.4.0) running the small benchtests
I see:

$ time ./test

real	0m3.647s
user	0m0.080s
sys	0m11.836s

While with the patch I see:

$ time ./test

real	0m0.696s
user	0m0.040s
sys	0m1.152s

So I added a pthread_create benchtest (thread_create) which check
the thread creation latency.  As for the simple benchtests, I saw
improvements in thread creation on all architectures I tested the
change.

Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu,
arm-linux-gnueabihf, powerpc64le-linux-gnu, sparc64-linux-gnu,
and sparcv9-linux-gnu.

	[BZ #18988]
	* benchtests/thread_create-inputs: New file.
	* benchtests/thread_create-source.c: Likewise.
	* support/xpthread_attr_setguardsize.c: Likewise.
	* support/Makefile (libsupport-routines): Add
	xpthread_attr_setguardsize object.
	* support/xthread.h: Add xpthread_attr_setguardsize prototype.
	* benchtests/Makefile (bench-pthread): Add thread_create.
	* nptl/allocatestack.c (allocate_stack): Call mmap with PROT_NONE and
	then mprotect the required area.
2017-06-14 17:22:35 -03:00

148 lines
3.6 KiB
Makefile

# Makefile for support library, used only at build and test time
# Copyright (C) 2016-2017 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <http://www.gnu.org/licenses/>.
subdir := support
include ../Makeconfig
extra-libs := libsupport
extra-libs-others = $(extra-libs)
extra-libs-noinstall := $(extra-libs)
libsupport-routines = \
check \
check_addrinfo \
check_dns_packet \
check_hostent \
check_netent \
delayed_exit \
ignore_stderr \
oom_error \
resolv_test \
set_fortify_handler \
support-xstat \
support_become_root \
support_can_chroot \
support_capture_subprocess \
support_capture_subprocess_check \
support_enter_network_namespace \
support_format_address_family \
support_format_addrinfo \
support_format_dns_packet \
support_format_herrno \
support_format_hostent \
support_format_netent \
support_isolate_in_subprocess \
support_record_failure \
support_run_diff \
support_shared_allocate \
support_write_file_string \
support_test_main \
support_test_verify_impl \
temp_file \
write_message \
xaccept \
xaccept4 \
xasprintf \
xbind \
xcalloc \
xchroot \
xclose \
xconnect \
xdup2 \
xfclose \
xfopen \
xfork \
xgetsockname \
xlisten \
xmalloc \
xmemstream \
xmkdir \
xmmap \
xmunmap \
xopen \
xpipe \
xpoll \
xpthread_attr_destroy \
xpthread_attr_init \
xpthread_attr_setdetachstate \
xpthread_attr_setstacksize \
xpthread_attr_setguardsize \
xpthread_barrier_destroy \
xpthread_barrier_init \
xpthread_barrier_wait \
xpthread_cancel \
xpthread_check_return \
xpthread_cond_wait \
xpthread_create \
xpthread_detach \
xpthread_join \
xpthread_mutex_consistent \
xpthread_mutex_destroy \
xpthread_mutex_init \
xpthread_mutex_lock \
xpthread_mutex_unlock \
xpthread_mutexattr_destroy \
xpthread_mutexattr_init \
xpthread_mutexattr_setprotocol \
xpthread_mutexattr_setpshared \
xpthread_mutexattr_setrobust \
xpthread_mutexattr_settype \
xpthread_once \
xpthread_sigmask \
xpthread_spin_lock \
xpthread_spin_unlock \
xrealloc \
xrecvfrom \
xsendto \
xsetsockopt \
xsocket \
xstrdup \
xwaitpid \
xwrite \
libsupport-static-only-routines := $(libsupport-routines)
# Only build one variant of the library.
libsupport-inhibit-o := .os
ifeq ($(build-shared),yes)
libsupport-inhibit-o += .o
endif
tests = \
README-testing \
tst-support-namespace \
tst-support_capture_subprocess \
tst-support_format_dns_packet \
tst-support_record_failure \
ifeq ($(run-built-tests),yes)
tests-special = \
$(objpfx)tst-support_record_failure-2.out
$(objpfx)tst-support_record_failure-2.out: tst-support_record_failure-2.sh \
$(objpfx)tst-support_record_failure
$(SHELL) $< $(common-objpfx) '$(test-program-prefix-before-env)' \
'$(run-program-env)' '$(test-program-prefix-after-env)' \
> $@; \
$(evaluate-test)
endif
$(objpfx)tst-support_format_dns_packet: $(common-objpfx)resolv/libresolv.so
include ../Rules