mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-21 20:40:05 +00:00
461cab1de7
Linux 6.11 has getrandom() in vDSO. It operates on a thread-local opaque state allocated with mmap using flags specified by the vDSO. Multiple states are allocated at once, as many as fit into a page, and these are held in an array of available states to be doled out to each thread upon first use, and recycled when a thread terminates. As these states run low, more are allocated. To make this procedure async-signal-safe, a simple guard is used in the LSB of the opaque state address, falling back to the syscall if there's reentrancy contention. Also, _Fork() is handled by blocking signals on opaque state allocation (so _Fork() always sees a consistent state even if it interrupts a getrandom() call) and by iterating over the thread stack cache on reclaim_stack. Each opaque state will be in the free states list (grnd_alloc.states) or allocated to a running thread. The cancellation is handled by always using GRND_NONBLOCK flags while calling the vDSO, and falling back to the cancellable syscall if the kernel returns EAGAIN (would block). Since getrandom is not defined by POSIX and cancellation is supported as an extension, the cancellation is handled as 'may occur' instead of 'shall occur' [1], meaning that if vDSO does not block (the expected behavior) getrandom will not act as a cancellation entrypoint. It avoids a pthread_testcancel call on the fast path (different than 'shall occur' functions, like sem_wait()). It is currently enabled for x86_64, which is available in Linux 6.11, and aarch64, powerpc32, powerpc64, loongarch64, and s390x, which are available in Linux 6.12. Link: https://pubs.opengroup.org/onlinepubs/9799919799/nframe.html [1] Co-developed-by: Jason A. Donenfeld <Jason@zx2c4.com> Tested-by: Jason A. Donenfeld <Jason@zx2c4.com> # x86_64 Tested-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> # x86_64, aarch64 Tested-by: Xi Ruoyao <xry111@xry111.site> # x86_64, aarch64, loongarch64 Tested-by: Stefan Liebler <stli@linux.ibm.com> # s390x
971 lines
35 KiB
C
971 lines
35 KiB
C
/* Copyright (C) 2002-2024 Free Software Foundation, Inc.
|
||
This file is part of the GNU C Library.
|
||
|
||
The GNU C Library is free software; you can redistribute it and/or
|
||
modify it under the terms of the GNU Lesser General Public
|
||
License as published by the Free Software Foundation; either
|
||
version 2.1 of the License, or (at your option) any later version.
|
||
|
||
The GNU C Library is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
Lesser General Public License for more details.
|
||
|
||
You should have received a copy of the GNU Lesser General Public
|
||
License along with the GNU C Library; if not, see
|
||
<https://www.gnu.org/licenses/>. */
|
||
|
||
#include <ctype.h>
|
||
#include <errno.h>
|
||
#include <stdbool.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <stdint.h>
|
||
#include "pthreadP.h"
|
||
#include <hp-timing.h>
|
||
#include <ldsodefs.h>
|
||
#include <atomic.h>
|
||
#include <libc-diag.h>
|
||
#include <libc-internal.h>
|
||
#include <resolv.h>
|
||
#include <kernel-features.h>
|
||
#include <default-sched.h>
|
||
#include <futex-internal.h>
|
||
#include <tls-setup.h>
|
||
#include <rseq-internal.h>
|
||
#include "libioP.h"
|
||
#include <sys/single_threaded.h>
|
||
#include <version.h>
|
||
#include <clone_internal.h>
|
||
#include <futex-internal.h>
|
||
#include <getrandom-internal.h>
|
||
|
||
#include <shlib-compat.h>
|
||
|
||
#include <stap-probe.h>
|
||
|
||
|
||
/* Globally enabled events. */
|
||
extern td_thr_events_t __nptl_threads_events;
|
||
libc_hidden_proto (__nptl_threads_events)
|
||
td_thr_events_t __nptl_threads_events;
|
||
libc_hidden_data_def (__nptl_threads_events)
|
||
|
||
/* Pointer to descriptor with the last event. */
|
||
extern struct pthread *__nptl_last_event;
|
||
libc_hidden_proto (__nptl_last_event)
|
||
struct pthread *__nptl_last_event;
|
||
libc_hidden_data_def (__nptl_last_event)
|
||
|
||
#ifdef SHARED
|
||
/* This variable is used to access _rtld_global from libthread_db. If
|
||
GDB loads libpthread before ld.so, it is not possible to resolve
|
||
_rtld_global directly during libpthread initialization. */
|
||
struct rtld_global *__nptl_rtld_global = &_rtld_global;
|
||
#endif
|
||
|
||
/* Version of the library, used in libthread_db to detect mismatches. */
|
||
const char __nptl_version[] = VERSION;
|
||
|
||
/* This performs the initialization necessary when going from
|
||
single-threaded to multi-threaded mode for the first time. */
|
||
static void
|
||
late_init (void)
|
||
{
|
||
struct sigaction sa;
|
||
__sigemptyset (&sa.sa_mask);
|
||
|
||
/* Install the handle to change the threads' uid/gid. Use
|
||
SA_ONSTACK because the signal may be sent to threads that are
|
||
running with custom stacks. (This is less likely for
|
||
SIGCANCEL.) */
|
||
sa.sa_sigaction = __nptl_setxid_sighandler;
|
||
sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTART;
|
||
(void) __libc_sigaction (SIGSETXID, &sa, NULL);
|
||
|
||
/* The parent process might have left the signals blocked. Just in
|
||
case, unblock it. We reuse the signal mask in the sigaction
|
||
structure. It is already cleared. */
|
||
__sigaddset (&sa.sa_mask, SIGCANCEL);
|
||
__sigaddset (&sa.sa_mask, SIGSETXID);
|
||
INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &sa.sa_mask,
|
||
NULL, __NSIG_BYTES);
|
||
}
|
||
|
||
/* Code to allocate and deallocate a stack. */
|
||
#include "allocatestack.c"
|
||
|
||
/* CONCURRENCY NOTES:
|
||
|
||
Understanding who is the owner of the 'struct pthread' or 'PD'
|
||
(refers to the value of the 'struct pthread *pd' function argument)
|
||
is critically important in determining exactly which operations are
|
||
allowed and which are not and when, particularly when it comes to the
|
||
implementation of pthread_create, pthread_join, pthread_detach, and
|
||
other functions which all operate on PD.
|
||
|
||
The owner of PD is responsible for freeing the final resources
|
||
associated with PD, and may examine the memory underlying PD at any
|
||
point in time until it frees it back to the OS or to reuse by the
|
||
runtime.
|
||
|
||
The thread which calls pthread_create is called the creating thread.
|
||
The creating thread begins as the owner of PD.
|
||
|
||
During startup the new thread may examine PD in coordination with the
|
||
owner thread (which may be itself).
|
||
|
||
The four cases of ownership transfer are:
|
||
|
||
(1) Ownership of PD is released to the process (all threads may use it)
|
||
after the new thread starts in a joinable state
|
||
i.e. pthread_create returns a usable pthread_t.
|
||
|
||
(2) Ownership of PD is released to the new thread starting in a detached
|
||
state.
|
||
|
||
(3) Ownership of PD is dynamically released to a running thread via
|
||
pthread_detach.
|
||
|
||
(4) Ownership of PD is acquired by the thread which calls pthread_join.
|
||
|
||
Implementation notes:
|
||
|
||
The PD->stopped_start and thread_ran variables are used to determine
|
||
exactly which of the four ownership states we are in and therefore
|
||
what actions can be taken. For example after (2) we cannot read or
|
||
write from PD anymore since the thread may no longer exist and the
|
||
memory may be unmapped.
|
||
|
||
It is important to point out that PD->lock is being used both
|
||
similar to a one-shot semaphore and subsequently as a mutex. The
|
||
lock is taken in the parent to force the child to wait, and then the
|
||
child releases the lock. However, this semaphore-like effect is used
|
||
only for synchronizing the parent and child. After startup the lock
|
||
is used like a mutex to create a critical section during which a
|
||
single owner modifies the thread parameters.
|
||
|
||
The most complicated cases happen during thread startup:
|
||
|
||
(a) If the created thread is in a detached (PTHREAD_CREATE_DETACHED),
|
||
or joinable (default PTHREAD_CREATE_JOINABLE) state and
|
||
STOPPED_START is true, then the creating thread has ownership of
|
||
PD until the PD->lock is released by pthread_create. If any
|
||
errors occur we are in states (c) or (d) below.
|
||
|
||
(b) If the created thread is in a detached state
|
||
(PTHREAD_CREATED_DETACHED), and STOPPED_START is false, then the
|
||
creating thread has ownership of PD until it invokes the OS
|
||
kernel's thread creation routine. If this routine returns
|
||
without error, then the created thread owns PD; otherwise, see
|
||
(c) or (d) below.
|
||
|
||
(c) If either a joinable or detached thread setup failed and THREAD_RAN
|
||
is true, then the creating thread releases ownership to the new thread,
|
||
the created thread sees the failed setup through PD->setup_failed
|
||
member, releases the PD ownership, and exits. The creating thread will
|
||
be responsible for cleanup the allocated resources. The THREAD_RAN is
|
||
local to creating thread and indicate whether thread creation or setup
|
||
has failed.
|
||
|
||
(d) If the thread creation failed and THREAD_RAN is false (meaning
|
||
ARCH_CLONE has failed), then the creating thread retains ownership
|
||
of PD and must cleanup he allocated resource. No waiting for the new
|
||
thread is required because it never started.
|
||
|
||
The nptl_db interface:
|
||
|
||
The interface with nptl_db requires that we enqueue PD into a linked
|
||
list and then call a function which the debugger will trap. The PD
|
||
will then be dequeued and control returned to the thread. The caller
|
||
at the time must have ownership of PD and such ownership remains
|
||
after control returns to thread. The enqueued PD is removed from the
|
||
linked list by the nptl_db callback td_thr_event_getmsg. The debugger
|
||
must ensure that the thread does not resume execution, otherwise
|
||
ownership of PD may be lost and examining PD will not be possible.
|
||
|
||
Note that the GNU Debugger as of (December 10th 2015) commit
|
||
c2c2a31fdb228d41ce3db62b268efea04bd39c18 no longer uses
|
||
td_thr_event_getmsg and several other related nptl_db interfaces. The
|
||
principal reason for this is that nptl_db does not support non-stop
|
||
mode where other threads can run concurrently and modify runtime
|
||
structures currently in use by the debugger and the nptl_db
|
||
interface.
|
||
|
||
Axioms:
|
||
|
||
* The create_thread function can never set stopped_start to false.
|
||
* The created thread can read stopped_start but never write to it.
|
||
* The variable thread_ran is set some time after the OS thread
|
||
creation routine returns, how much time after the thread is created
|
||
is unspecified, but it should be as quickly as possible.
|
||
|
||
*/
|
||
|
||
/* CREATE THREAD NOTES:
|
||
|
||
create_thread must initialize PD->stopped_start. It should be true
|
||
if the STOPPED_START parameter is true, or if create_thread needs the
|
||
new thread to synchronize at startup for some other implementation
|
||
reason. If STOPPED_START will be true, then create_thread is obliged
|
||
to lock PD->lock before starting the thread. Then pthread_create
|
||
unlocks PD->lock which synchronizes-with create_thread in the
|
||
child thread which does an acquire/release of PD->lock as the last
|
||
action before calling the user entry point. The goal of all of this
|
||
is to ensure that the required initial thread attributes are applied
|
||
(by the creating thread) before the new thread runs user code. Note
|
||
that the the functions pthread_getschedparam, pthread_setschedparam,
|
||
pthread_setschedprio, __pthread_tpp_change_priority, and
|
||
__pthread_current_priority reuse the same lock, PD->lock, for a
|
||
similar purpose e.g. synchronizing the setting of similar thread
|
||
attributes. These functions are never called before the thread is
|
||
created, so don't participate in startup synchronization, but given
|
||
that the lock is present already and in the unlocked state, reusing
|
||
it saves space.
|
||
|
||
The return value is zero for success or an errno code for failure.
|
||
If the return value is ENOMEM, that will be translated to EAGAIN,
|
||
so create_thread need not do that. On failure, *THREAD_RAN should
|
||
be set to true iff the thread actually started up but before calling
|
||
the user code (*PD->start_routine). */
|
||
|
||
static int _Noreturn start_thread (void *arg);
|
||
|
||
static int create_thread (struct pthread *pd, const struct pthread_attr *attr,
|
||
bool *stopped_start, void *stackaddr,
|
||
size_t stacksize, bool *thread_ran)
|
||
{
|
||
/* Determine whether the newly created threads has to be started
|
||
stopped since we have to set the scheduling parameters or set the
|
||
affinity. */
|
||
bool need_setaffinity = (attr != NULL && attr->extension != NULL
|
||
&& attr->extension->cpuset != 0);
|
||
if (attr != NULL
|
||
&& (__glibc_unlikely (need_setaffinity)
|
||
|| __glibc_unlikely ((attr->flags & ATTR_FLAG_NOTINHERITSCHED) != 0)))
|
||
*stopped_start = true;
|
||
|
||
pd->stopped_start = *stopped_start;
|
||
if (__glibc_unlikely (*stopped_start))
|
||
lll_lock (pd->lock, LLL_PRIVATE);
|
||
|
||
/* We rely heavily on various flags the CLONE function understands:
|
||
|
||
CLONE_VM, CLONE_FS, CLONE_FILES
|
||
These flags select semantics with shared address space and
|
||
file descriptors according to what POSIX requires.
|
||
|
||
CLONE_SIGHAND, CLONE_THREAD
|
||
This flag selects the POSIX signal semantics and various
|
||
other kinds of sharing (itimers, POSIX timers, etc.).
|
||
|
||
CLONE_SETTLS
|
||
The sixth parameter to CLONE determines the TLS area for the
|
||
new thread.
|
||
|
||
CLONE_PARENT_SETTID
|
||
The kernels writes the thread ID of the newly created thread
|
||
into the location pointed to by the fifth parameters to CLONE.
|
||
|
||
Note that it would be semantically equivalent to use
|
||
CLONE_CHILD_SETTID but it is be more expensive in the kernel.
|
||
|
||
CLONE_CHILD_CLEARTID
|
||
The kernels clears the thread ID of a thread that has called
|
||
sys_exit() in the location pointed to by the seventh parameter
|
||
to CLONE.
|
||
|
||
The termination signal is chosen to be zero which means no signal
|
||
is sent. */
|
||
const int clone_flags = (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SYSVSEM
|
||
| CLONE_SIGHAND | CLONE_THREAD
|
||
| CLONE_SETTLS | CLONE_PARENT_SETTID
|
||
| CLONE_CHILD_CLEARTID
|
||
| 0);
|
||
|
||
TLS_DEFINE_INIT_TP (tp, pd);
|
||
|
||
struct clone_args args =
|
||
{
|
||
.flags = clone_flags,
|
||
.pidfd = (uintptr_t) &pd->tid,
|
||
.parent_tid = (uintptr_t) &pd->tid,
|
||
.child_tid = (uintptr_t) &pd->tid,
|
||
.stack = (uintptr_t) stackaddr,
|
||
.stack_size = stacksize,
|
||
.tls = (uintptr_t) tp,
|
||
};
|
||
int ret = __clone_internal (&args, &start_thread, pd);
|
||
if (__glibc_unlikely (ret == -1))
|
||
return errno;
|
||
|
||
/* It's started now, so if we fail below, we'll have to let it clean itself
|
||
up. */
|
||
*thread_ran = true;
|
||
|
||
/* Now we have the possibility to set scheduling parameters etc. */
|
||
if (attr != NULL)
|
||
{
|
||
/* Set the affinity mask if necessary. */
|
||
if (need_setaffinity)
|
||
{
|
||
assert (*stopped_start);
|
||
|
||
int res = INTERNAL_SYSCALL_CALL (sched_setaffinity, pd->tid,
|
||
attr->extension->cpusetsize,
|
||
attr->extension->cpuset);
|
||
if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (res)))
|
||
return INTERNAL_SYSCALL_ERRNO (res);
|
||
}
|
||
|
||
/* Set the scheduling parameters. */
|
||
if ((attr->flags & ATTR_FLAG_NOTINHERITSCHED) != 0)
|
||
{
|
||
assert (*stopped_start);
|
||
|
||
int res = INTERNAL_SYSCALL_CALL (sched_setscheduler, pd->tid,
|
||
pd->schedpolicy, &pd->schedparam);
|
||
if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (res)))
|
||
return INTERNAL_SYSCALL_ERRNO (res);
|
||
}
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/* Local function to start thread and handle cleanup. */
|
||
static int _Noreturn
|
||
start_thread (void *arg)
|
||
{
|
||
struct pthread *pd = arg;
|
||
|
||
/* We are either in (a) or (b), and in either case we either own PD already
|
||
(2) or are about to own PD (1), and so our only restriction would be that
|
||
we can't free PD until we know we have ownership (see CONCURRENCY NOTES
|
||
above). */
|
||
if (pd->stopped_start)
|
||
{
|
||
bool setup_failed = false;
|
||
|
||
/* Get the lock the parent locked to force synchronization. */
|
||
lll_lock (pd->lock, LLL_PRIVATE);
|
||
|
||
/* We have ownership of PD now, for detached threads with setup failure
|
||
we set it as joinable so the creating thread could synchronous join
|
||
and free any resource prior return to the pthread_create caller. */
|
||
setup_failed = pd->setup_failed == 1;
|
||
if (setup_failed)
|
||
pd->joinid = NULL;
|
||
|
||
/* And give it up right away. */
|
||
lll_unlock (pd->lock, LLL_PRIVATE);
|
||
|
||
if (setup_failed)
|
||
goto out;
|
||
}
|
||
|
||
/* Initialize resolver state pointer. */
|
||
__resp = &pd->res;
|
||
|
||
/* Initialize pointers to locale data. */
|
||
__ctype_init ();
|
||
|
||
/* Name the thread stack if kernel supports it. */
|
||
name_stack_maps (pd, true);
|
||
|
||
/* Register rseq TLS to the kernel. */
|
||
{
|
||
bool do_rseq = THREAD_GETMEM (pd, flags) & ATTR_FLAG_DO_RSEQ;
|
||
if (!rseq_register_current_thread (pd, do_rseq) && do_rseq)
|
||
__libc_fatal ("Fatal glibc error: rseq registration failed\n");
|
||
}
|
||
|
||
#ifndef __ASSUME_SET_ROBUST_LIST
|
||
if (__nptl_set_robust_list_avail)
|
||
#endif
|
||
{
|
||
/* This call should never fail because the initial call in init.c
|
||
succeeded. */
|
||
INTERNAL_SYSCALL_CALL (set_robust_list, &pd->robust_head,
|
||
sizeof (struct robust_list_head));
|
||
}
|
||
|
||
/* This is where the try/finally block should be created. For
|
||
compilers without that support we do use setjmp. */
|
||
struct pthread_unwind_buf unwind_buf;
|
||
|
||
int not_first_call;
|
||
DIAG_PUSH_NEEDS_COMMENT;
|
||
#if __GNUC_PREREQ (7, 0)
|
||
/* This call results in a -Wstringop-overflow warning because struct
|
||
pthread_unwind_buf is smaller than jmp_buf. setjmp and longjmp
|
||
do not use anything beyond the common prefix (they never access
|
||
the saved signal mask), so that is a false positive. */
|
||
DIAG_IGNORE_NEEDS_COMMENT (11, "-Wstringop-overflow=");
|
||
#endif
|
||
not_first_call = setjmp ((struct __jmp_buf_tag *) unwind_buf.cancel_jmp_buf);
|
||
DIAG_POP_NEEDS_COMMENT;
|
||
|
||
/* No previous handlers. NB: This must be done after setjmp since the
|
||
private space in the unwind jump buffer may overlap space used by
|
||
setjmp to store extra architecture-specific information which is
|
||
never used by the cancellation-specific __libc_unwind_longjmp.
|
||
|
||
The private space is allowed to overlap because the unwinder never
|
||
has to return through any of the jumped-to call frames, and thus
|
||
only a minimum amount of saved data need be stored, and for example,
|
||
need not include the process signal mask information. This is all
|
||
an optimization to reduce stack usage when pushing cancellation
|
||
handlers. */
|
||
unwind_buf.priv.data.prev = NULL;
|
||
unwind_buf.priv.data.cleanup = NULL;
|
||
|
||
/* Allow setxid from now onwards. */
|
||
if (__glibc_unlikely (atomic_exchange_acquire (&pd->setxid_futex, 0) == -2))
|
||
futex_wake (&pd->setxid_futex, 1, FUTEX_PRIVATE);
|
||
|
||
if (__glibc_likely (! not_first_call))
|
||
{
|
||
/* Store the new cleanup handler info. */
|
||
THREAD_SETMEM (pd, cleanup_jmp_buf, &unwind_buf);
|
||
|
||
internal_signal_restore_set (&pd->sigmask);
|
||
|
||
LIBC_PROBE (pthread_start, 3, (pthread_t) pd, pd->start_routine, pd->arg);
|
||
|
||
/* Run the code the user provided. */
|
||
void *ret;
|
||
if (pd->c11)
|
||
{
|
||
/* The function pointer of the c11 thread start is cast to an incorrect
|
||
type on __pthread_create_2_1 call, however it is casted back to correct
|
||
one so the call behavior is well-defined (it is assumed that pointers
|
||
to void are able to represent all values of int. */
|
||
int (*start)(void*) = (int (*) (void*)) pd->start_routine;
|
||
ret = (void*) (uintptr_t) start (pd->arg);
|
||
}
|
||
else
|
||
ret = pd->start_routine (pd->arg);
|
||
THREAD_SETMEM (pd, result, ret);
|
||
}
|
||
|
||
/* Call destructors for the thread_local TLS variables. */
|
||
call_function_static_weak (__call_tls_dtors);
|
||
|
||
/* Run the destructor for the thread-local data. */
|
||
__nptl_deallocate_tsd ();
|
||
|
||
/* Clean up any state libc stored in thread-local variables. */
|
||
__libc_thread_freeres ();
|
||
|
||
/* Report the death of the thread if this is wanted. */
|
||
if (__glibc_unlikely (pd->report_events))
|
||
{
|
||
/* See whether TD_DEATH is in any of the mask. */
|
||
const int idx = __td_eventword (TD_DEATH);
|
||
const uint32_t mask = __td_eventmask (TD_DEATH);
|
||
|
||
if ((mask & (__nptl_threads_events.event_bits[idx]
|
||
| pd->eventbuf.eventmask.event_bits[idx])) != 0)
|
||
{
|
||
/* Yep, we have to signal the death. Add the descriptor to
|
||
the list but only if it is not already on it. */
|
||
if (pd->nextevent == NULL)
|
||
{
|
||
pd->eventbuf.eventnum = TD_DEATH;
|
||
pd->eventbuf.eventdata = pd;
|
||
|
||
do
|
||
pd->nextevent = __nptl_last_event;
|
||
while (atomic_compare_and_exchange_bool_acq (&__nptl_last_event,
|
||
pd, pd->nextevent));
|
||
}
|
||
|
||
/* Now call the function which signals the event. See
|
||
CONCURRENCY NOTES for the nptl_db interface comments. */
|
||
__nptl_death_event ();
|
||
}
|
||
}
|
||
|
||
/* The thread is exiting now. Don't set this bit until after we've hit
|
||
the event-reporting breakpoint, so that td_thr_get_info on us while at
|
||
the breakpoint reports TD_THR_RUN state rather than TD_THR_ZOMBIE. */
|
||
atomic_fetch_or_relaxed (&pd->cancelhandling, EXITING_BITMASK);
|
||
|
||
if (__glibc_unlikely (atomic_fetch_add_relaxed (&__nptl_nthreads, -1) == 1))
|
||
/* This was the last thread. */
|
||
exit (0);
|
||
|
||
/* This prevents sending a signal from this thread to itself during
|
||
its final stages. This must come after the exit call above
|
||
because atexit handlers must not run with signals blocked.
|
||
|
||
Do not block SIGSETXID. The setxid handshake below expects the
|
||
signal to be delivered. (SIGSETXID cannot run application code,
|
||
nor does it use pthread_kill.) Reuse the pd->sigmask space for
|
||
computing the signal mask, to save stack space. */
|
||
internal_sigfillset (&pd->sigmask);
|
||
internal_sigdelset (&pd->sigmask, SIGSETXID);
|
||
INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_BLOCK, &pd->sigmask, NULL,
|
||
__NSIG_BYTES);
|
||
|
||
/* Tell __pthread_kill_internal that this thread is about to exit.
|
||
If there is a __pthread_kill_internal in progress, this delays
|
||
the thread exit until the signal has been queued by the kernel
|
||
(so that the TID used to send it remains valid). */
|
||
__libc_lock_lock (pd->exit_lock);
|
||
pd->exiting = true;
|
||
__libc_lock_unlock (pd->exit_lock);
|
||
|
||
#ifndef __ASSUME_SET_ROBUST_LIST
|
||
/* If this thread has any robust mutexes locked, handle them now. */
|
||
# if __PTHREAD_MUTEX_HAVE_PREV
|
||
void *robust = pd->robust_head.list;
|
||
# else
|
||
__pthread_slist_t *robust = pd->robust_list.__next;
|
||
# endif
|
||
/* We let the kernel do the notification if it is able to do so.
|
||
If we have to do it here there for sure are no PI mutexes involved
|
||
since the kernel support for them is even more recent. */
|
||
if (!__nptl_set_robust_list_avail
|
||
&& __builtin_expect (robust != (void *) &pd->robust_head, 0))
|
||
{
|
||
do
|
||
{
|
||
struct __pthread_mutex_s *this = (struct __pthread_mutex_s *)
|
||
((char *) robust - offsetof (struct __pthread_mutex_s,
|
||
__list.__next));
|
||
robust = *((void **) robust);
|
||
|
||
# if __PTHREAD_MUTEX_HAVE_PREV
|
||
this->__list.__prev = NULL;
|
||
# endif
|
||
this->__list.__next = NULL;
|
||
|
||
atomic_fetch_or_acquire (&this->__lock, FUTEX_OWNER_DIED);
|
||
futex_wake ((unsigned int *) &this->__lock, 1,
|
||
/* XYZ */ FUTEX_SHARED);
|
||
}
|
||
while (robust != (void *) &pd->robust_head);
|
||
}
|
||
#endif
|
||
|
||
/* Release the vDSO getrandom per-thread buffer with all signal blocked,
|
||
to avoid creating a new free-state block during thread release. */
|
||
__getrandom_vdso_release (pd);
|
||
|
||
if (!pd->user_stack)
|
||
advise_stack_range (pd->stackblock, pd->stackblock_size, (uintptr_t) pd,
|
||
pd->guardsize);
|
||
|
||
if (__glibc_unlikely (pd->cancelhandling & SETXID_BITMASK))
|
||
{
|
||
/* Some other thread might call any of the setXid functions and expect
|
||
us to reply. In this case wait until we did that. */
|
||
do
|
||
/* XXX This differs from the typical futex_wait_simple pattern in that
|
||
the futex_wait condition (setxid_futex) is different from the
|
||
condition used in the surrounding loop (cancelhandling). We need
|
||
to check and document why this is correct. */
|
||
futex_wait_simple (&pd->setxid_futex, 0, FUTEX_PRIVATE);
|
||
while (pd->cancelhandling & SETXID_BITMASK);
|
||
|
||
/* Reset the value so that the stack can be reused. */
|
||
pd->setxid_futex = 0;
|
||
}
|
||
|
||
/* If the thread is detached free the TCB. */
|
||
if (IS_DETACHED (pd))
|
||
/* Free the TCB. */
|
||
__nptl_free_tcb (pd);
|
||
|
||
/* Remove the associated name from the thread stack. */
|
||
name_stack_maps (pd, false);
|
||
|
||
out:
|
||
/* We cannot call '_exit' here. '_exit' will terminate the process.
|
||
|
||
The 'exit' implementation in the kernel will signal when the
|
||
process is really dead since 'clone' got passed the CLONE_CHILD_CLEARTID
|
||
flag. The 'tid' field in the TCB will be set to zero.
|
||
|
||
rseq TLS is still registered at this point. Rely on implicit
|
||
unregistration performed by the kernel on thread teardown. This is not a
|
||
problem because the rseq TLS lives on the stack, and the stack outlives
|
||
the thread. If TCB allocation is ever changed, additional steps may be
|
||
required, such as performing explicit rseq unregistration before
|
||
reclaiming the rseq TLS area memory. It is NOT sufficient to block
|
||
signals because the kernel may write to the rseq area even without
|
||
signals.
|
||
|
||
The exit code is zero since in case all threads exit by calling
|
||
'pthread_exit' the exit status must be 0 (zero). */
|
||
while (1)
|
||
INTERNAL_SYSCALL_CALL (exit, 0);
|
||
|
||
/* NOTREACHED */
|
||
}
|
||
|
||
|
||
/* Return true iff obliged to report TD_CREATE events. */
|
||
static bool
|
||
report_thread_creation (struct pthread *pd)
|
||
{
|
||
if (__glibc_unlikely (THREAD_GETMEM (THREAD_SELF, report_events)))
|
||
{
|
||
/* The parent thread is supposed to report events.
|
||
Check whether the TD_CREATE event is needed, too. */
|
||
const size_t idx = __td_eventword (TD_CREATE);
|
||
const uint32_t mask = __td_eventmask (TD_CREATE);
|
||
|
||
return ((mask & (__nptl_threads_events.event_bits[idx]
|
||
| pd->eventbuf.eventmask.event_bits[idx])) != 0);
|
||
}
|
||
return false;
|
||
}
|
||
|
||
|
||
int
|
||
__pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
|
||
void *(*start_routine) (void *), void *arg)
|
||
{
|
||
void *stackaddr = NULL;
|
||
size_t stacksize = 0;
|
||
|
||
/* Avoid a data race in the multi-threaded case, and call the
|
||
deferred initialization only once. */
|
||
if (__libc_single_threaded_internal)
|
||
{
|
||
late_init ();
|
||
__libc_single_threaded_internal = 0;
|
||
/* __libc_single_threaded can be accessed through copy relocations, so
|
||
it requires to update the external copy. */
|
||
__libc_single_threaded = 0;
|
||
}
|
||
|
||
const struct pthread_attr *iattr = (struct pthread_attr *) attr;
|
||
union pthread_attr_transparent default_attr;
|
||
bool destroy_default_attr = false;
|
||
bool c11 = (attr == ATTR_C11_THREAD);
|
||
if (iattr == NULL || c11)
|
||
{
|
||
int ret = __pthread_getattr_default_np (&default_attr.external);
|
||
if (ret != 0)
|
||
return ret;
|
||
destroy_default_attr = true;
|
||
iattr = &default_attr.internal;
|
||
}
|
||
|
||
struct pthread *pd = NULL;
|
||
int err = allocate_stack (iattr, &pd, &stackaddr, &stacksize);
|
||
int retval = 0;
|
||
|
||
if (__glibc_unlikely (err != 0))
|
||
/* Something went wrong. Maybe a parameter of the attributes is
|
||
invalid or we could not allocate memory. Note we have to
|
||
translate error codes. */
|
||
{
|
||
retval = err == ENOMEM ? EAGAIN : err;
|
||
goto out;
|
||
}
|
||
|
||
|
||
/* Initialize the TCB. All initializations with zero should be
|
||
performed in 'get_cached_stack'. This way we avoid doing this if
|
||
the stack freshly allocated with 'mmap'. */
|
||
|
||
#if TLS_TCB_AT_TP
|
||
/* Reference to the TCB itself. */
|
||
pd->header.self = pd;
|
||
|
||
/* Self-reference for TLS. */
|
||
pd->header.tcb = pd;
|
||
#endif
|
||
|
||
/* Store the address of the start routine and the parameter. Since
|
||
we do not start the function directly the stillborn thread will
|
||
get the information from its thread descriptor. */
|
||
pd->start_routine = start_routine;
|
||
pd->arg = arg;
|
||
pd->c11 = c11;
|
||
|
||
/* Copy the thread attribute flags. */
|
||
struct pthread *self = THREAD_SELF;
|
||
pd->flags = ((iattr->flags & ~(ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET))
|
||
| (self->flags & (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET)));
|
||
|
||
/* Inherit rseq registration state. Without seccomp filters, rseq
|
||
registration will either always fail or always succeed. */
|
||
if ((int) THREAD_GETMEM_VOLATILE (self, rseq_area.cpu_id) >= 0)
|
||
pd->flags |= ATTR_FLAG_DO_RSEQ;
|
||
|
||
/* Initialize the field for the ID of the thread which is waiting
|
||
for us. This is a self-reference in case the thread is created
|
||
detached. */
|
||
pd->joinid = iattr->flags & ATTR_FLAG_DETACHSTATE ? pd : NULL;
|
||
|
||
/* The debug events are inherited from the parent. */
|
||
pd->eventbuf = self->eventbuf;
|
||
|
||
|
||
/* Copy the parent's scheduling parameters. The flags will say what
|
||
is valid and what is not. */
|
||
pd->schedpolicy = self->schedpolicy;
|
||
pd->schedparam = self->schedparam;
|
||
|
||
/* Copy the stack guard canary. */
|
||
#ifdef THREAD_COPY_STACK_GUARD
|
||
THREAD_COPY_STACK_GUARD (pd);
|
||
#endif
|
||
|
||
/* Copy the pointer guard value. */
|
||
#ifdef THREAD_COPY_POINTER_GUARD
|
||
THREAD_COPY_POINTER_GUARD (pd);
|
||
#endif
|
||
|
||
/* Setup tcbhead. */
|
||
tls_setup_tcbhead (pd);
|
||
|
||
/* Verify the sysinfo bits were copied in allocate_stack if needed. */
|
||
#ifdef NEED_DL_SYSINFO
|
||
CHECK_THREAD_SYSINFO (pd);
|
||
#endif
|
||
|
||
/* Determine scheduling parameters for the thread. */
|
||
if (__builtin_expect ((iattr->flags & ATTR_FLAG_NOTINHERITSCHED) != 0, 0)
|
||
&& (iattr->flags & (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET)) != 0)
|
||
{
|
||
/* Use the scheduling parameters the user provided. */
|
||
if (iattr->flags & ATTR_FLAG_POLICY_SET)
|
||
{
|
||
pd->schedpolicy = iattr->schedpolicy;
|
||
pd->flags |= ATTR_FLAG_POLICY_SET;
|
||
}
|
||
if (iattr->flags & ATTR_FLAG_SCHED_SET)
|
||
{
|
||
/* The values were validated in pthread_attr_setschedparam. */
|
||
pd->schedparam = iattr->schedparam;
|
||
pd->flags |= ATTR_FLAG_SCHED_SET;
|
||
}
|
||
|
||
if ((pd->flags & (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET))
|
||
!= (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET))
|
||
collect_default_sched (pd);
|
||
}
|
||
|
||
if (__glibc_unlikely (__nptl_nthreads == 1))
|
||
_IO_enable_locks ();
|
||
|
||
/* Pass the descriptor to the caller. */
|
||
*newthread = (pthread_t) pd;
|
||
|
||
LIBC_PROBE (pthread_create, 4, newthread, attr, start_routine, arg);
|
||
|
||
/* One more thread. We cannot have the thread do this itself, since it
|
||
might exist but not have been scheduled yet by the time we've returned
|
||
and need to check the value to behave correctly. We must do it before
|
||
creating the thread, in case it does get scheduled first and then
|
||
might mistakenly think it was the only thread. In the failure case,
|
||
we momentarily store a false value; this doesn't matter because there
|
||
is no kosher thing a signal handler interrupting us right here can do
|
||
that cares whether the thread count is correct. */
|
||
atomic_fetch_add_relaxed (&__nptl_nthreads, 1);
|
||
|
||
/* Our local value of stopped_start and thread_ran can be accessed at
|
||
any time. The PD->stopped_start may only be accessed if we have
|
||
ownership of PD (see CONCURRENCY NOTES above). */
|
||
bool stopped_start = false; bool thread_ran = false;
|
||
|
||
/* Block all signals, so that the new thread starts out with
|
||
signals disabled. This avoids race conditions in the thread
|
||
startup. */
|
||
internal_sigset_t original_sigmask;
|
||
internal_signal_block_all (&original_sigmask);
|
||
|
||
if (iattr->extension != NULL && iattr->extension->sigmask_set)
|
||
/* Use the signal mask in the attribute. The internal signals
|
||
have already been filtered by the public
|
||
pthread_attr_setsigmask_np interface. */
|
||
internal_sigset_from_sigset (&pd->sigmask, &iattr->extension->sigmask);
|
||
else
|
||
{
|
||
/* Conceptually, the new thread needs to inherit the signal mask
|
||
of this thread. Therefore, it needs to restore the saved
|
||
signal mask of this thread, so save it in the startup
|
||
information. */
|
||
pd->sigmask = original_sigmask;
|
||
/* Reset the cancellation signal mask in case this thread is
|
||
running cancellation. */
|
||
internal_sigdelset (&pd->sigmask, SIGCANCEL);
|
||
}
|
||
|
||
/* Start the thread. */
|
||
if (__glibc_unlikely (report_thread_creation (pd)))
|
||
{
|
||
stopped_start = true;
|
||
|
||
/* We always create the thread stopped at startup so we can
|
||
notify the debugger. */
|
||
retval = create_thread (pd, iattr, &stopped_start, stackaddr,
|
||
stacksize, &thread_ran);
|
||
if (retval == 0)
|
||
{
|
||
/* We retain ownership of PD until (a) (see CONCURRENCY NOTES
|
||
above). */
|
||
|
||
/* Assert stopped_start is true in both our local copy and the
|
||
PD copy. */
|
||
assert (stopped_start);
|
||
assert (pd->stopped_start);
|
||
|
||
/* Now fill in the information about the new thread in
|
||
the newly created thread's data structure. We cannot let
|
||
the new thread do this since we don't know whether it was
|
||
already scheduled when we send the event. */
|
||
pd->eventbuf.eventnum = TD_CREATE;
|
||
pd->eventbuf.eventdata = pd;
|
||
|
||
/* Enqueue the descriptor. */
|
||
do
|
||
pd->nextevent = __nptl_last_event;
|
||
while (atomic_compare_and_exchange_bool_acq (&__nptl_last_event,
|
||
pd, pd->nextevent)
|
||
!= 0);
|
||
|
||
/* Now call the function which signals the event. See
|
||
CONCURRENCY NOTES for the nptl_db interface comments. */
|
||
__nptl_create_event ();
|
||
}
|
||
}
|
||
else
|
||
retval = create_thread (pd, iattr, &stopped_start, stackaddr,
|
||
stacksize, &thread_ran);
|
||
|
||
/* Return to the previous signal mask, after creating the new
|
||
thread. */
|
||
internal_signal_restore_set (&original_sigmask);
|
||
|
||
if (__glibc_unlikely (retval != 0))
|
||
{
|
||
if (thread_ran)
|
||
/* State (c) and we not have PD ownership (see CONCURRENCY NOTES
|
||
above). We can assert that STOPPED_START must have been true
|
||
because thread creation didn't fail, but thread attribute setting
|
||
did. */
|
||
{
|
||
assert (stopped_start);
|
||
/* Signal the created thread to release PD ownership and early
|
||
exit so it could be joined. */
|
||
pd->setup_failed = 1;
|
||
lll_unlock (pd->lock, LLL_PRIVATE);
|
||
|
||
/* Similar to pthread_join, but since thread creation has failed at
|
||
startup there is no need to handle all the steps. */
|
||
pid_t tid;
|
||
while ((tid = atomic_load_acquire (&pd->tid)) != 0)
|
||
__futex_abstimed_wait_cancelable64 ((unsigned int *) &pd->tid,
|
||
tid, 0, NULL, LLL_SHARED);
|
||
}
|
||
|
||
/* State (c) or (d) and we have ownership of PD (see CONCURRENCY
|
||
NOTES above). */
|
||
|
||
/* Oops, we lied for a second. */
|
||
atomic_fetch_add_relaxed (&__nptl_nthreads, -1);
|
||
|
||
/* Free the resources. */
|
||
__nptl_deallocate_stack (pd);
|
||
|
||
/* We have to translate error codes. */
|
||
if (retval == ENOMEM)
|
||
retval = EAGAIN;
|
||
}
|
||
else
|
||
{
|
||
/* We don't know if we have PD ownership. Once we check the local
|
||
stopped_start we'll know if we're in state (a) or (b) (see
|
||
CONCURRENCY NOTES above). */
|
||
if (stopped_start)
|
||
/* State (a), we own PD. The thread blocked on this lock either
|
||
because we're doing TD_CREATE event reporting, or for some
|
||
other reason that create_thread chose. Now let it run
|
||
free. */
|
||
lll_unlock (pd->lock, LLL_PRIVATE);
|
||
|
||
/* We now have for sure more than one thread. The main thread might
|
||
not yet have the flag set. No need to set the global variable
|
||
again if this is what we use. */
|
||
THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1);
|
||
}
|
||
|
||
out:
|
||
if (destroy_default_attr)
|
||
__pthread_attr_destroy (&default_attr.external);
|
||
|
||
return retval;
|
||
}
|
||
versioned_symbol (libc, __pthread_create_2_1, pthread_create, GLIBC_2_34);
|
||
libc_hidden_ver (__pthread_create_2_1, __pthread_create)
|
||
#ifndef SHARED
|
||
strong_alias (__pthread_create_2_1, __pthread_create)
|
||
#endif
|
||
|
||
#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_1, GLIBC_2_34)
|
||
compat_symbol (libpthread, __pthread_create_2_1, pthread_create, GLIBC_2_1);
|
||
#endif
|
||
|
||
#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_0, GLIBC_2_1)
|
||
int
|
||
__pthread_create_2_0 (pthread_t *newthread, const pthread_attr_t *attr,
|
||
void *(*start_routine) (void *), void *arg)
|
||
{
|
||
/* The ATTR attribute is not really of type `pthread_attr_t *'. It has
|
||
the old size and access to the new members might crash the program.
|
||
We convert the struct now. */
|
||
struct pthread_attr new_attr;
|
||
|
||
if (attr != NULL)
|
||
{
|
||
struct pthread_attr *iattr = (struct pthread_attr *) attr;
|
||
size_t ps = __getpagesize ();
|
||
|
||
/* Copy values from the user-provided attributes. */
|
||
new_attr.schedparam = iattr->schedparam;
|
||
new_attr.schedpolicy = iattr->schedpolicy;
|
||
new_attr.flags = iattr->flags;
|
||
|
||
/* Fill in default values for the fields not present in the old
|
||
implementation. */
|
||
new_attr.guardsize = ps;
|
||
new_attr.stackaddr = NULL;
|
||
new_attr.stacksize = 0;
|
||
new_attr.extension = NULL;
|
||
|
||
/* We will pass this value on to the real implementation. */
|
||
attr = (pthread_attr_t *) &new_attr;
|
||
}
|
||
|
||
return __pthread_create_2_1 (newthread, attr, start_routine, arg);
|
||
}
|
||
compat_symbol (libpthread, __pthread_create_2_0, pthread_create,
|
||
GLIBC_2_0);
|
||
#endif
|
||
|
||
/* Information for libthread_db. */
|
||
|
||
#include "../nptl_db/db_info.c"
|
||
|
||
/* If pthread_create is present, libgcc_eh.a and libsupc++.a expects some other POSIX thread
|
||
functions to be present as well. */
|
||
PTHREAD_STATIC_FN_REQUIRE (__pthread_mutex_lock)
|
||
PTHREAD_STATIC_FN_REQUIRE (__pthread_mutex_trylock)
|
||
PTHREAD_STATIC_FN_REQUIRE (__pthread_mutex_unlock)
|
||
|
||
PTHREAD_STATIC_FN_REQUIRE (__pthread_once)
|
||
PTHREAD_STATIC_FN_REQUIRE (__pthread_cancel)
|
||
|
||
PTHREAD_STATIC_FN_REQUIRE (__pthread_key_create)
|
||
PTHREAD_STATIC_FN_REQUIRE (__pthread_key_delete)
|
||
PTHREAD_STATIC_FN_REQUIRE (__pthread_setspecific)
|
||
PTHREAD_STATIC_FN_REQUIRE (__pthread_getspecific)
|