mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-23 19:30:10 +00:00
c579f48edb
This patch remove the PID cache and usage in current GLIBC code. Current usage is mainly used a performance optimization to avoid the syscall, however it adds some issues: - The exposed clone syscall will try to set pid/tid to make the new thread somewhat compatible with current GLIBC assumptions. This cause a set of issue with new workloads and usecases (such as BZ#17214 and [1]) as well for new internal usage of clone to optimize other algorithms (such as clone plus CLONE_VM for posix_spawn, BZ#19957). - The caching complexity also added some bugs in the past [2] [3] and requires more effort of each port to handle such requirements (for both clone and vfork implementation). - Caching performance gain in mainly on getpid and some specific code paths. The getpid performance leverage is questionable [4], either by the idea of getpid being a hotspot as for the getpid implementation itself (if it is indeed a justifiable hotspot a vDSO symbol could let to a much more simpler solution). Other usage is mainly for non usual code paths, such as pthread cancellation signal and handling. For thread creation (on stack allocation) the code simplification in fact adds some performance gain due the no need of transverse the stack cache and invalidate each element pid. Other thread usages will require a direct getpid syscall, such as cancellation/setxid signal, thread cancellation, thread fail path (at create_thread), and thread signal (pthread_kill and pthread_sigqueue). However these are hardly usual hotspots and I think adding a syscall is justifiable. It also simplifies both the clone and vfork arch-specific implementation. And by review each fork implementation there are some discrepancies that this patch also solves: - microblaze clone/vfork does not set/reset the pid/tid field - hppa uses the default vfork implementation that fallback to fork. Since vfork is deprecated I do not think we should bother with it. The patch also removes the TID caching in clone. My understanding for such semantic is try provide some pthread usage after a user program issue clone directly (as done by thread creation with CLONE_PARENT_SETTID and pthread tid member). However, as stated before in multiple discussions threads, GLIBC provides clone syscalls without further supporting all this semantics. I ran a full make check on x86_64, x32, i686, armhf, aarch64, and powerpc64le. For sparc32, sparc64, and mips I ran the basic fork and vfork tests from posix/ folder (on a qemu system). So it would require further testing on alpha, hppa, ia64, m68k, nios2, s390, sh, and tile (I excluded microblaze because it is already implementing the patch semantic regarding clone/vfork). [1] https://codereview.chromium.org/800183004/ [2] https://sourceware.org/ml/libc-alpha/2006-07/msg00123.html [3] https://sourceware.org/bugzilla/show_bug.cgi?id=15368 [4] http://yarchive.net/comp/linux/getpid_caching.html * sysdeps/nptl/fork.c (__libc_fork): Remove pid cache setting. * nptl/allocatestack.c (allocate_stack): Likewise. (__reclaim_stacks): Likewise. (setxid_signal_thread): Obtain pid through syscall. * nptl/nptl-init.c (sigcancel_handler): Likewise. (sighandle_setxid): Likewise. * nptl/pthread_cancel.c (pthread_cancel): Likewise. * sysdeps/unix/sysv/linux/pthread_kill.c (__pthread_kill): Likewise. * sysdeps/unix/sysv/linux/pthread_sigqueue.c (pthread_sigqueue): Likewise. * sysdeps/unix/sysv/linux/createthread.c (create_thread): Likewise. * sysdeps/unix/sysv/linux/getpid.c: Remove file. * nptl/descr.h (struct pthread): Change comment about pid value. * nptl/pthread_getattr_np.c (pthread_getattr_np): Remove thread pid assert. * sysdeps/unix/sysv/linux/pthread-pids.h (__pthread_initialize_pids): Do not set pid value. * nptl_db/td_ta_thr_iter.c (iterate_thread_list): Remove thread pid cache check. * nptl_db/td_thr_validate.c (td_thr_validate): Likewise. * sysdeps/aarch64/nptl/tcb-offsets.sym: Remove pid offset. * sysdeps/alpha/nptl/tcb-offsets.sym: Likewise. * sysdeps/arm/nptl/tcb-offsets.sym: Likewise. * sysdeps/hppa/nptl/tcb-offsets.sym: Likewise. * sysdeps/i386/nptl/tcb-offsets.sym: Likewise. * sysdeps/ia64/nptl/tcb-offsets.sym: Likewise. * sysdeps/m68k/nptl/tcb-offsets.sym: Likewise. * sysdeps/microblaze/nptl/tcb-offsets.sym: Likewise. * sysdeps/mips/nptl/tcb-offsets.sym: Likewise. * sysdeps/nios2/nptl/tcb-offsets.sym: Likewise. * sysdeps/powerpc/nptl/tcb-offsets.sym: Likewise. * sysdeps/s390/nptl/tcb-offsets.sym: Likewise. * sysdeps/sh/nptl/tcb-offsets.sym: Likewise. * sysdeps/sparc/nptl/tcb-offsets.sym: Likewise. * sysdeps/tile/nptl/tcb-offsets.sym: Likewise. * sysdeps/x86_64/nptl/tcb-offsets.sym: Likewise. * sysdeps/unix/sysv/linux/aarch64/clone.S: Remove pid and tid caching. * sysdeps/unix/sysv/linux/alpha/clone.S: Likewise. * sysdeps/unix/sysv/linux/arm/clone.S: Likewise. * sysdeps/unix/sysv/linux/hppa/clone.S: Likewise. * sysdeps/unix/sysv/linux/i386/clone.S: Likewise. * sysdeps/unix/sysv/linux/ia64/clone2.S: Likewise. * sysdeps/unix/sysv/linux/mips/clone.S: Likewise. * sysdeps/unix/sysv/linux/nios2/clone.S: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc32/clone.S: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/clone.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/clone.S: Likewise. * sysdeps/unix/sysv/linux/sh/clone.S: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc32/clone.S: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc64/clone.S: Likewise. * sysdeps/unix/sysv/linux/tile/clone.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise. * sysdeps/unix/sysv/linux/aarch64/vfork.S: Remove pid set and reset. * sysdeps/unix/sysv/linux/alpha/vfork.S: Likewise. * sysdeps/unix/sysv/linux/arm/vfork.S: Likewise. * sysdeps/unix/sysv/linux/i386/vfork.S: Likewise. * sysdeps/unix/sysv/linux/ia64/vfork.S: Likewise. * sysdeps/unix/sysv/linux/m68k/clone.S: Likewise. * sysdeps/unix/sysv/linux/m68k/vfork.S: Likewise. * sysdeps/unix/sysv/linux/mips/vfork.S: Likewise. * sysdeps/unix/sysv/linux/nios2/vfork.S: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc32/vfork.S: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc64/vfork.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/vfork.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/vfork.S: Likewise. * sysdeps/unix/sysv/linux/sh/vfork.S: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc32/vfork.S: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc64/vfork.S: Likewise. * sysdeps/unix/sysv/linux/tile/vfork.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/vfork.S: Likewise. * sysdeps/unix/sysv/linux/tst-clone2.c (f): Remove direct pthread struct access. (clone_test): Remove function. (do_test): Rewrite to take in consideration pid is not cached anymore.
390 lines
13 KiB
C
390 lines
13 KiB
C
/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef _DESCR_H
|
|
#define _DESCR_H 1
|
|
|
|
#include <limits.h>
|
|
#include <sched.h>
|
|
#include <setjmp.h>
|
|
#include <stdbool.h>
|
|
#include <sys/types.h>
|
|
#include <hp-timing.h>
|
|
#define __need_list_t
|
|
#include <list.h>
|
|
#include <lowlevellock.h>
|
|
#include <pthreaddef.h>
|
|
#include <dl-sysdep.h>
|
|
#include "../nptl_db/thread_db.h"
|
|
#include <tls.h>
|
|
#include <unwind.h>
|
|
#define __need_res_state
|
|
#include <resolv.h>
|
|
#include <kernel-features.h>
|
|
|
|
#ifndef TCB_ALIGNMENT
|
|
# define TCB_ALIGNMENT sizeof (double)
|
|
#endif
|
|
|
|
|
|
/* We keep thread specific data in a special data structure, a two-level
|
|
array. The top-level array contains pointers to dynamically allocated
|
|
arrays of a certain number of data pointers. So we can implement a
|
|
sparse array. Each dynamic second-level array has
|
|
PTHREAD_KEY_2NDLEVEL_SIZE
|
|
entries. This value shouldn't be too large. */
|
|
#define PTHREAD_KEY_2NDLEVEL_SIZE 32
|
|
|
|
/* We need to address PTHREAD_KEYS_MAX key with PTHREAD_KEY_2NDLEVEL_SIZE
|
|
keys in each subarray. */
|
|
#define PTHREAD_KEY_1STLEVEL_SIZE \
|
|
((PTHREAD_KEYS_MAX + PTHREAD_KEY_2NDLEVEL_SIZE - 1) \
|
|
/ PTHREAD_KEY_2NDLEVEL_SIZE)
|
|
|
|
|
|
|
|
|
|
/* Internal version of the buffer to store cancellation handler
|
|
information. */
|
|
struct pthread_unwind_buf
|
|
{
|
|
struct
|
|
{
|
|
__jmp_buf jmp_buf;
|
|
int mask_was_saved;
|
|
} cancel_jmp_buf[1];
|
|
|
|
union
|
|
{
|
|
/* This is the placeholder of the public version. */
|
|
void *pad[4];
|
|
|
|
struct
|
|
{
|
|
/* Pointer to the previous cleanup buffer. */
|
|
struct pthread_unwind_buf *prev;
|
|
|
|
/* Backward compatibility: state of the old-style cleanup
|
|
handler at the time of the previous new-style cleanup handler
|
|
installment. */
|
|
struct _pthread_cleanup_buffer *cleanup;
|
|
|
|
/* Cancellation type before the push call. */
|
|
int canceltype;
|
|
} data;
|
|
} priv;
|
|
};
|
|
|
|
|
|
/* Opcodes and data types for communication with the signal handler to
|
|
change user/group IDs. */
|
|
struct xid_command
|
|
{
|
|
int syscall_no;
|
|
long int id[3];
|
|
volatile int cntr;
|
|
volatile int error; /* -1: no call yet, 0: success seen, >0: error seen. */
|
|
};
|
|
|
|
|
|
/* Data structure used by the kernel to find robust futexes. */
|
|
struct robust_list_head
|
|
{
|
|
void *list;
|
|
long int futex_offset;
|
|
void *list_op_pending;
|
|
};
|
|
|
|
|
|
/* Data strcture used to handle thread priority protection. */
|
|
struct priority_protection_data
|
|
{
|
|
int priomax;
|
|
unsigned int priomap[];
|
|
};
|
|
|
|
|
|
/* Thread descriptor data structure. */
|
|
struct pthread
|
|
{
|
|
union
|
|
{
|
|
#if !TLS_DTV_AT_TP
|
|
/* This overlaps the TCB as used for TLS without threads (see tls.h). */
|
|
tcbhead_t header;
|
|
#else
|
|
struct
|
|
{
|
|
/* multiple_threads is enabled either when the process has spawned at
|
|
least one thread or when a single-threaded process cancels itself.
|
|
This enables additional code to introduce locking before doing some
|
|
compare_and_exchange operations and also enable cancellation points.
|
|
The concepts of multiple threads and cancellation points ideally
|
|
should be separate, since it is not necessary for multiple threads to
|
|
have been created for cancellation points to be enabled, as is the
|
|
case is when single-threaded process cancels itself.
|
|
|
|
Since enabling multiple_threads enables additional code in
|
|
cancellation points and compare_and_exchange operations, there is a
|
|
potential for an unneeded performance hit when it is enabled in a
|
|
single-threaded, self-canceling process. This is OK though, since a
|
|
single-threaded process will enable async cancellation only when it
|
|
looks to cancel itself and is hence going to end anyway. */
|
|
int multiple_threads;
|
|
int gscope_flag;
|
|
# ifndef __ASSUME_PRIVATE_FUTEX
|
|
int private_futex;
|
|
# endif
|
|
} header;
|
|
#endif
|
|
|
|
/* This extra padding has no special purpose, and this structure layout
|
|
is private and subject to change without affecting the official ABI.
|
|
We just have it here in case it might be convenient for some
|
|
implementation-specific instrumentation hack or suchlike. */
|
|
void *__padding[24];
|
|
};
|
|
|
|
/* This descriptor's link on the `stack_used' or `__stack_user' list. */
|
|
list_t list;
|
|
|
|
/* Thread ID - which is also a 'is this thread descriptor (and
|
|
therefore stack) used' flag. */
|
|
pid_t tid;
|
|
|
|
/* Ununsed. */
|
|
pid_t pid_ununsed;
|
|
|
|
/* List of robust mutexes the thread is holding. */
|
|
#ifdef __PTHREAD_MUTEX_HAVE_PREV
|
|
void *robust_prev;
|
|
struct robust_list_head robust_head;
|
|
|
|
/* The list above is strange. It is basically a double linked list
|
|
but the pointer to the next/previous element of the list points
|
|
in the middle of the object, the __next element. Whenever
|
|
casting to __pthread_list_t we need to adjust the pointer
|
|
first. */
|
|
# define QUEUE_PTR_ADJUST (offsetof (__pthread_list_t, __next))
|
|
|
|
# define ENQUEUE_MUTEX_BOTH(mutex, val) \
|
|
do { \
|
|
__pthread_list_t *next = (__pthread_list_t *) \
|
|
((((uintptr_t) THREAD_GETMEM (THREAD_SELF, robust_head.list)) & ~1ul) \
|
|
- QUEUE_PTR_ADJUST); \
|
|
next->__prev = (void *) &mutex->__data.__list.__next; \
|
|
mutex->__data.__list.__next = THREAD_GETMEM (THREAD_SELF, \
|
|
robust_head.list); \
|
|
mutex->__data.__list.__prev = (void *) &THREAD_SELF->robust_head; \
|
|
THREAD_SETMEM (THREAD_SELF, robust_head.list, \
|
|
(void *) (((uintptr_t) &mutex->__data.__list.__next) \
|
|
| val)); \
|
|
} while (0)
|
|
# define DEQUEUE_MUTEX(mutex) \
|
|
do { \
|
|
__pthread_list_t *next = (__pthread_list_t *) \
|
|
((char *) (((uintptr_t) mutex->__data.__list.__next) & ~1ul) \
|
|
- QUEUE_PTR_ADJUST); \
|
|
next->__prev = mutex->__data.__list.__prev; \
|
|
__pthread_list_t *prev = (__pthread_list_t *) \
|
|
((char *) (((uintptr_t) mutex->__data.__list.__prev) & ~1ul) \
|
|
- QUEUE_PTR_ADJUST); \
|
|
prev->__next = mutex->__data.__list.__next; \
|
|
mutex->__data.__list.__prev = NULL; \
|
|
mutex->__data.__list.__next = NULL; \
|
|
} while (0)
|
|
#else
|
|
union
|
|
{
|
|
__pthread_slist_t robust_list;
|
|
struct robust_list_head robust_head;
|
|
};
|
|
|
|
# define ENQUEUE_MUTEX_BOTH(mutex, val) \
|
|
do { \
|
|
mutex->__data.__list.__next \
|
|
= THREAD_GETMEM (THREAD_SELF, robust_list.__next); \
|
|
THREAD_SETMEM (THREAD_SELF, robust_list.__next, \
|
|
(void *) (((uintptr_t) &mutex->__data.__list) | val)); \
|
|
} while (0)
|
|
# define DEQUEUE_MUTEX(mutex) \
|
|
do { \
|
|
__pthread_slist_t *runp = (__pthread_slist_t *) \
|
|
(((uintptr_t) THREAD_GETMEM (THREAD_SELF, robust_list.__next)) & ~1ul); \
|
|
if (runp == &mutex->__data.__list) \
|
|
THREAD_SETMEM (THREAD_SELF, robust_list.__next, runp->__next); \
|
|
else \
|
|
{ \
|
|
__pthread_slist_t *next = (__pthread_slist_t *) \
|
|
(((uintptr_t) runp->__next) & ~1ul); \
|
|
while (next != &mutex->__data.__list) \
|
|
{ \
|
|
runp = next; \
|
|
next = (__pthread_slist_t *) (((uintptr_t) runp->__next) & ~1ul); \
|
|
} \
|
|
\
|
|
runp->__next = next->__next; \
|
|
mutex->__data.__list.__next = NULL; \
|
|
} \
|
|
} while (0)
|
|
#endif
|
|
#define ENQUEUE_MUTEX(mutex) ENQUEUE_MUTEX_BOTH (mutex, 0)
|
|
#define ENQUEUE_MUTEX_PI(mutex) ENQUEUE_MUTEX_BOTH (mutex, 1)
|
|
|
|
/* List of cleanup buffers. */
|
|
struct _pthread_cleanup_buffer *cleanup;
|
|
|
|
/* Unwind information. */
|
|
struct pthread_unwind_buf *cleanup_jmp_buf;
|
|
#define HAVE_CLEANUP_JMP_BUF
|
|
|
|
/* Flags determining processing of cancellation. */
|
|
int cancelhandling;
|
|
/* Bit set if cancellation is disabled. */
|
|
#define CANCELSTATE_BIT 0
|
|
#define CANCELSTATE_BITMASK (0x01 << CANCELSTATE_BIT)
|
|
/* Bit set if asynchronous cancellation mode is selected. */
|
|
#define CANCELTYPE_BIT 1
|
|
#define CANCELTYPE_BITMASK (0x01 << CANCELTYPE_BIT)
|
|
/* Bit set if canceling has been initiated. */
|
|
#define CANCELING_BIT 2
|
|
#define CANCELING_BITMASK (0x01 << CANCELING_BIT)
|
|
/* Bit set if canceled. */
|
|
#define CANCELED_BIT 3
|
|
#define CANCELED_BITMASK (0x01 << CANCELED_BIT)
|
|
/* Bit set if thread is exiting. */
|
|
#define EXITING_BIT 4
|
|
#define EXITING_BITMASK (0x01 << EXITING_BIT)
|
|
/* Bit set if thread terminated and TCB is freed. */
|
|
#define TERMINATED_BIT 5
|
|
#define TERMINATED_BITMASK (0x01 << TERMINATED_BIT)
|
|
/* Bit set if thread is supposed to change XID. */
|
|
#define SETXID_BIT 6
|
|
#define SETXID_BITMASK (0x01 << SETXID_BIT)
|
|
/* Mask for the rest. Helps the compiler to optimize. */
|
|
#define CANCEL_RESTMASK 0xffffff80
|
|
|
|
#define CANCEL_ENABLED_AND_CANCELED(value) \
|
|
(((value) & (CANCELSTATE_BITMASK | CANCELED_BITMASK | EXITING_BITMASK \
|
|
| CANCEL_RESTMASK | TERMINATED_BITMASK)) == CANCELED_BITMASK)
|
|
#define CANCEL_ENABLED_AND_CANCELED_AND_ASYNCHRONOUS(value) \
|
|
(((value) & (CANCELSTATE_BITMASK | CANCELTYPE_BITMASK | CANCELED_BITMASK \
|
|
| EXITING_BITMASK | CANCEL_RESTMASK | TERMINATED_BITMASK)) \
|
|
== (CANCELTYPE_BITMASK | CANCELED_BITMASK))
|
|
|
|
/* Flags. Including those copied from the thread attribute. */
|
|
int flags;
|
|
|
|
/* We allocate one block of references here. This should be enough
|
|
to avoid allocating any memory dynamically for most applications. */
|
|
struct pthread_key_data
|
|
{
|
|
/* Sequence number. We use uintptr_t to not require padding on
|
|
32- and 64-bit machines. On 64-bit machines it helps to avoid
|
|
wrapping, too. */
|
|
uintptr_t seq;
|
|
|
|
/* Data pointer. */
|
|
void *data;
|
|
} specific_1stblock[PTHREAD_KEY_2NDLEVEL_SIZE];
|
|
|
|
/* Two-level array for the thread-specific data. */
|
|
struct pthread_key_data *specific[PTHREAD_KEY_1STLEVEL_SIZE];
|
|
|
|
/* Flag which is set when specific data is set. */
|
|
bool specific_used;
|
|
|
|
/* True if events must be reported. */
|
|
bool report_events;
|
|
|
|
/* True if the user provided the stack. */
|
|
bool user_stack;
|
|
|
|
/* True if thread must stop at startup time. */
|
|
bool stopped_start;
|
|
|
|
/* The parent's cancel handling at the time of the pthread_create
|
|
call. This might be needed to undo the effects of a cancellation. */
|
|
int parent_cancelhandling;
|
|
|
|
/* Lock to synchronize access to the descriptor. */
|
|
int lock;
|
|
|
|
/* Lock for synchronizing setxid calls. */
|
|
unsigned int setxid_futex;
|
|
|
|
#if HP_TIMING_AVAIL
|
|
/* Offset of the CPU clock at start thread start time. */
|
|
hp_timing_t cpuclock_offset;
|
|
#endif
|
|
|
|
/* If the thread waits to join another one the ID of the latter is
|
|
stored here.
|
|
|
|
In case a thread is detached this field contains a pointer of the
|
|
TCB if the thread itself. This is something which cannot happen
|
|
in normal operation. */
|
|
struct pthread *joinid;
|
|
/* Check whether a thread is detached. */
|
|
#define IS_DETACHED(pd) ((pd)->joinid == (pd))
|
|
|
|
/* The result of the thread function. */
|
|
void *result;
|
|
|
|
/* Scheduling parameters for the new thread. */
|
|
struct sched_param schedparam;
|
|
int schedpolicy;
|
|
|
|
/* Start position of the code to be executed and the argument passed
|
|
to the function. */
|
|
void *(*start_routine) (void *);
|
|
void *arg;
|
|
|
|
/* Debug state. */
|
|
td_eventbuf_t eventbuf;
|
|
/* Next descriptor with a pending event. */
|
|
struct pthread *nextevent;
|
|
|
|
/* Machine-specific unwind info. */
|
|
struct _Unwind_Exception exc;
|
|
|
|
/* If nonzero pointer to area allocated for the stack and its
|
|
size. */
|
|
void *stackblock;
|
|
size_t stackblock_size;
|
|
/* Size of the included guard area. */
|
|
size_t guardsize;
|
|
/* This is what the user specified and what we will report. */
|
|
size_t reported_guardsize;
|
|
|
|
/* Thread Priority Protection data. */
|
|
struct priority_protection_data *tpp;
|
|
|
|
/* Resolver state. */
|
|
struct __res_state res;
|
|
|
|
/* This member must be last. */
|
|
char end_padding[];
|
|
|
|
#define PTHREAD_STRUCT_END_PADDING \
|
|
(sizeof (struct pthread) - offsetof (struct pthread, end_padding))
|
|
} __attribute ((aligned (TCB_ALIGNMENT)));
|
|
|
|
|
|
#endif /* descr.h */
|