nptl: Eliminate the __static_tls_size, __static_tls_align_m1 variables

Use the  __nptl_tls_static_size_for_stack inline function instead,
and the GLRO (dl_tls_static_align) value directly.

The computation of GLRO (dl_tls_static_align)  in
_dl_determine_tlsoffset ensures that the alignment is at least
TLS_TCB_ALIGN, which at least STACK_ALIGN (see allocate_stack).
Therefore, the additional rounding-up step is removed.

ALso move the initialization of the default stack size from
__pthread_initialize_minimal_internal to __pthread_early_init.
This introduces an extra system call during single-threaded startup,
but this simplifies the initialization sequence.  No locking is
needed around the writes to __default_pthread_attr because the
process is single-threaded at this point.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
This commit is contained in:
Florian Weimer 2021-05-21 22:35:00 +02:00
parent 2f69522d46
commit d03511f48f
6 changed files with 58 additions and 61 deletions

View File

@ -386,8 +386,9 @@ allocate_dtv (void *result)
return result; return result;
} }
/* Get size and alignment requirements of the static TLS block. This
/* Get size and alignment requirements of the static TLS block. */ function is no longer used by glibc itself, but the GCC sanitizers
use it despite the GLIBC_PRIVATE status. */
void void
_dl_get_tls_static_info (size_t *sizep, size_t *alignp) _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
{ {

View File

@ -254,6 +254,8 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
struct pthread *pd; struct pthread *pd;
size_t size; size_t size;
size_t pagesize_m1 = __getpagesize () - 1; size_t pagesize_m1 = __getpagesize () - 1;
size_t tls_static_size_for_stack = __nptl_tls_static_size_for_stack ();
size_t tls_static_align_m1 = GLRO (dl_tls_static_align) - 1;
assert (powerof2 (pagesize_m1 + 1)); assert (powerof2 (pagesize_m1 + 1));
assert (TCB_ALIGNMENT >= STACK_ALIGN); assert (TCB_ALIGNMENT >= STACK_ALIGN);
@ -284,17 +286,18 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
/* If the user also specified the size of the stack make sure it /* If the user also specified the size of the stack make sure it
is large enough. */ is large enough. */
if (attr->stacksize != 0 if (attr->stacksize != 0
&& attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK)) && attr->stacksize < (tls_static_size_for_stack
+ MINIMAL_REST_STACK))
return EINVAL; return EINVAL;
/* Adjust stack size for alignment of the TLS block. */ /* Adjust stack size for alignment of the TLS block. */
#if TLS_TCB_AT_TP #if TLS_TCB_AT_TP
adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE) adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
& __static_tls_align_m1; & tls_static_align_m1;
assert (size > adj + TLS_TCB_SIZE); assert (size > adj + TLS_TCB_SIZE);
#elif TLS_DTV_AT_TP #elif TLS_DTV_AT_TP
adj = ((uintptr_t) stackaddr - __static_tls_size) adj = ((uintptr_t) stackaddr - tls_static_size_for_stack)
& __static_tls_align_m1; & tls_static_align_m1;
assert (size > adj); assert (size > adj);
#endif #endif
@ -307,7 +310,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
- TLS_TCB_SIZE - adj); - TLS_TCB_SIZE - adj);
#elif TLS_DTV_AT_TP #elif TLS_DTV_AT_TP
pd = (struct pthread *) (((uintptr_t) stackaddr pd = (struct pthread *) (((uintptr_t) stackaddr
- __static_tls_size - adj) - tls_static_size_for_stack - adj)
- TLS_PRE_TCB_SIZE); - TLS_PRE_TCB_SIZE);
#endif #endif
@ -366,7 +369,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
/* Adjust the stack size for alignment. */ /* Adjust the stack size for alignment. */
size &= ~__static_tls_align_m1; size &= ~tls_static_align_m1;
assert (size != 0); assert (size != 0);
/* Make sure the size of the stack is enough for the guard and /* Make sure the size of the stack is enough for the guard and
@ -385,7 +388,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
/* Arithmetic overflow. */ /* Arithmetic overflow. */
return EINVAL; return EINVAL;
size += guardsize; size += guardsize;
if (__builtin_expect (size < ((guardsize + __static_tls_size if (__builtin_expect (size < ((guardsize + tls_static_size_for_stack
+ MINIMAL_REST_STACK + pagesize_m1) + MINIMAL_REST_STACK + pagesize_m1)
& ~pagesize_m1), & ~pagesize_m1),
0)) 0))
@ -414,11 +417,11 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
#if TLS_TCB_AT_TP #if TLS_TCB_AT_TP
pd = (struct pthread *) ((((uintptr_t) mem + size) pd = (struct pthread *) ((((uintptr_t) mem + size)
- TLS_TCB_SIZE) - TLS_TCB_SIZE)
& ~__static_tls_align_m1); & ~tls_static_align_m1);
#elif TLS_DTV_AT_TP #elif TLS_DTV_AT_TP
pd = (struct pthread *) ((((uintptr_t) mem + size pd = (struct pthread *) ((((uintptr_t) mem + size
- __static_tls_size) - tls_static_size_for_stack)
& ~__static_tls_align_m1) & ~tls_static_align_m1)
- TLS_PRE_TCB_SIZE); - TLS_PRE_TCB_SIZE);
#endif #endif
@ -602,7 +605,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
# if TLS_TCB_AT_TP # if TLS_TCB_AT_TP
/* The stack begins before the TCB and the static TLS block. */ /* The stack begins before the TCB and the static TLS block. */
stacktop = ((char *) (pd + 1) - __static_tls_size); stacktop = ((char *) (pd + 1) - tls_static_size_for_stack);
# elif TLS_DTV_AT_TP # elif TLS_DTV_AT_TP
stacktop = (char *) (pd - 1); stacktop = (char *) (pd - 1);
# endif # endif

View File

@ -36,10 +36,7 @@
#include <kernel-features.h> #include <kernel-features.h>
#include <libc-pointer-arith.h> #include <libc-pointer-arith.h>
#include <pthread_mutex_conf.h> #include <pthread_mutex_conf.h>
#include <nptl-stack.h>
/* Size and alignment of static TLS block. */
size_t __static_tls_size;
size_t __static_tls_align_m1;
/* Version of the library, used in libthread_db to detect mismatches. */ /* Version of the library, used in libthread_db to detect mismatches. */
static const char nptl_version[] __attribute_used__ = VERSION; static const char nptl_version[] __attribute_used__ = VERSION;
@ -47,44 +44,6 @@ static const char nptl_version[] __attribute_used__ = VERSION;
void void
__pthread_initialize_minimal_internal (void) __pthread_initialize_minimal_internal (void)
{ {
/* Get the size of the static and alignment requirements for the TLS
block. */
size_t static_tls_align;
_dl_get_tls_static_info (&__static_tls_size, &static_tls_align);
/* Make sure the size takes all the alignments into account. */
if (STACK_ALIGN > static_tls_align)
static_tls_align = STACK_ALIGN;
__static_tls_align_m1 = static_tls_align - 1;
__static_tls_size = roundup (__static_tls_size, static_tls_align);
/* Determine the default allowed stack size. This is the size used
in case the user does not specify one. */
struct rlimit limit;
if (__getrlimit (RLIMIT_STACK, &limit) != 0
|| limit.rlim_cur == RLIM_INFINITY)
/* The system limit is not usable. Use an architecture-specific
default. */
limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
else if (limit.rlim_cur < PTHREAD_STACK_MIN)
/* The system limit is unusably small.
Use the minimal size acceptable. */
limit.rlim_cur = PTHREAD_STACK_MIN;
/* Make sure it meets the minimum size that allocate_stack
(allocatestack.c) will demand, which depends on the page size. */
const uintptr_t pagesz = GLRO(dl_pagesize);
const size_t minstack = pagesz + __static_tls_size + MINIMAL_REST_STACK;
if (limit.rlim_cur < minstack)
limit.rlim_cur = minstack;
/* Round the resource limit up to page size. */
limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);
lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
__default_pthread_attr.internal.stacksize = limit.rlim_cur;
__default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
} }
strong_alias (__pthread_initialize_minimal_internal, strong_alias (__pthread_initialize_minimal_internal,
__pthread_initialize_minimal) __pthread_initialize_minimal)
@ -101,5 +60,6 @@ strong_alias (__pthread_initialize_minimal_internal,
size_t size_t
__pthread_get_minstack (const pthread_attr_t *attr) __pthread_get_minstack (const pthread_attr_t *attr)
{ {
return GLRO(dl_pagesize) + __static_tls_size + PTHREAD_STACK_MIN; return (GLRO(dl_pagesize) + __nptl_tls_static_size_for_stack ()
+ PTHREAD_STACK_MIN);
} }

View File

@ -20,7 +20,8 @@
#ifndef _NPTL_STACK_H #ifndef _NPTL_STACK_H
#define _NPTL_STACK_H #define _NPTL_STACK_H
#include <descr.h> #include <nptl/descr.h>
#include <ldsodefs.h>
#include <list.h> #include <list.h>
#include <stdbool.h> #include <stdbool.h>
@ -47,4 +48,12 @@ libc_hidden_proto (__nptl_deallocate_stack)
/* Free stacks until cache size is lower than LIMIT. */ /* Free stacks until cache size is lower than LIMIT. */
void __nptl_free_stacks (size_t limit) attribute_hidden; void __nptl_free_stacks (size_t limit) attribute_hidden;
/* Compute the size of the static TLS area based on data from the
dynamic loader. */
static inline size_t
__nptl_tls_static_size_for_stack (void)
{
return roundup (GLRO (dl_tls_static_size), GLRO (dl_tls_static_align));
}
#endif /* _NPTL_STACK_H */ #endif /* _NPTL_STACK_H */

View File

@ -205,10 +205,6 @@ libc_hidden_proto (__default_pthread_attr_lock)
/* Called from __libc_freeres to deallocate the default attribute. */ /* Called from __libc_freeres to deallocate the default attribute. */
extern void __default_pthread_attr_freeres (void) attribute_hidden; extern void __default_pthread_attr_freeres (void) attribute_hidden;
/* Size and alignment of static TLS block. */
extern size_t __static_tls_size attribute_hidden;
extern size_t __static_tls_align_m1 attribute_hidden;
/* Attribute handling. */ /* Attribute handling. */
extern struct pthread_attr *__attr_list attribute_hidden; extern struct pthread_attr *__attr_list attribute_hidden;
extern int __attr_list_lock attribute_hidden; extern int __attr_list_lock attribute_hidden;

View File

@ -19,12 +19,40 @@
#ifndef _PTHREAD_EARLY_INIT_H #ifndef _PTHREAD_EARLY_INIT_H
#define _PTHREAD_EARLY_INIT_H 1 #define _PTHREAD_EARLY_INIT_H 1
#include <nptl/nptl-stack.h>
#include <nptl/pthreadP.h> #include <nptl/pthreadP.h>
#include <pthread_mutex_conf.h> #include <pthread_mutex_conf.h>
#include <sys/resource.h>
static inline void static inline void
__pthread_early_init (void) __pthread_early_init (void)
{ {
/* Determine the default allowed stack size. This is the size used
in case the user does not specify one. */
struct rlimit limit;
if (__getrlimit (RLIMIT_STACK, &limit) != 0
|| limit.rlim_cur == RLIM_INFINITY)
/* The system limit is not usable. Use an architecture-specific
default. */
limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
else if (limit.rlim_cur < PTHREAD_STACK_MIN)
/* The system limit is unusably small.
Use the minimal size acceptable. */
limit.rlim_cur = PTHREAD_STACK_MIN;
/* Make sure it meets the minimum size that allocate_stack
(allocatestack.c) will demand, which depends on the page size. */
const uintptr_t pagesz = GLRO(dl_pagesize);
const size_t minstack = (pagesz + __nptl_tls_static_size_for_stack ()
+ MINIMAL_REST_STACK);
if (limit.rlim_cur < minstack)
limit.rlim_cur = minstack;
/* Round the resource limit up to page size. */
limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);
__default_pthread_attr.internal.stacksize = limit.rlim_cur;
__default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
#if HAVE_TUNABLES #if HAVE_TUNABLES
__pthread_tunables_init (); __pthread_tunables_init ();
#endif #endif