2000-05-05  H.J. Lu  <hjl@gnu.org>

	* sysdeps/ia64/pt-machine.h (__compare_and_swap): Change it to
	have acquire semantics.
	(__compare_and_swap_with_release_semantics): New inline
	function.
	(HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS): New macro.

2000-01-28  Hans Boehm  <hboehm@exch.hpl.hp.com>

	* manager.c: Fix the problem with signals at startup.
	Change the way that thread stacks are allocated on IA64.
	Clean up some of the guard page allocation stuff.

1999-12-19  H.J. Lu  <hjl@gnu.org>

	* internals.h (page_roundup): New.
	* attr.c (__pthread_attr_setguardsize); Use page_roundup
	instead of roundup.
	* manager.c (pthread_allocate_stack): Make sure guardaddr is
	page aligned with page_roundup if NEED_SEPARATE_REGISTER_STACK
	is define.

1999-12-17  Hans Boehm  <hboehm@exch.hpl.hp.com>

	* manager.c (pthread_allocate_stack): Unmap the stack top
	if failed to map the stack bottom.
	Fix the guard page.
	(pthread_free): Fix the guard page.

	* pthread.c (pthread_initialize): Set rlimit correctly for
	NEED_SEPARATE_REGISTER_STACK.

1999-12-16  H.J. Lu  <hjl@gnu.org>

	* pthread.c (__pthread_initialize_manager): Pass
	__pthread_manager_thread_bos instead of
	__pthread_manager_thread_tos to __clone2.

1999-12-16  H.J. Lu  <hjl@gnu.org>

	* manager.c (pthread_allocate_stack): Correct the calculation
	of "new_thread_bottom". Remove MAP_GROWSDOWN from mmap for
	stack bottom.

1999-12-13  H.J. Lu  <hjl@gnu.org>

	* sysdeps/ia64/pt-machine.h (__compare_and_swap): Added a stop
	bit after setting ar.ccv.

1999-12-12  H.J. Lu  <hjl@gnu.org>

	* manager.c (pthread_allocate_stack): Make the starting
	address of the stack bottom page aligned. FIXME: it may
	need changes in other places.
	(pthread_handle_create): Likewise.

1999-12-11  Hans Boehm  <hboehm@exch.hpl.hp.com>

	* manager.c (pthread_allocate_stack): Handle
	NEED_SEPARATE_REGISTER_STACK.
	(pthread_handle_create): Likewise.
	* pthread.c (__pthread_initialize_manager): Likewise.

	* sysdeps/ia64/pt-machine.h: Use r13 for thread pointer.

1999-12-02  H.J. Lu  <hjl@gnu.org>

	* sysdeps/ia64/pt-machine.h: New.
This commit is contained in:
Ulrich Drepper 2000-07-15 19:02:47 +00:00
parent 101c92fc39
commit 9aae19cd9a
5 changed files with 352 additions and 23 deletions

View File

@ -1,3 +1,73 @@
2000-05-05 H.J. Lu <hjl@gnu.org>
* sysdeps/ia64/pt-machine.h (__compare_and_swap): Change it to
have acquire semantics.
(__compare_and_swap_with_release_semantics): New inline
function.
(HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS): New macro.
2000-01-28 Hans Boehm <hboehm@exch.hpl.hp.com>
* manager.c: Fix the problem with signals at startup.
Change the way that thread stacks are allocated on IA64.
Clean up some of the guard page allocation stuff.
1999-12-19 H.J. Lu <hjl@gnu.org>
* internals.h (page_roundup): New.
* attr.c (__pthread_attr_setguardsize); Use page_roundup
instead of roundup.
* manager.c (pthread_allocate_stack): Make sure guardaddr is
page aligned with page_roundup if NEED_SEPARATE_REGISTER_STACK
is define.
1999-12-17 Hans Boehm <hboehm@exch.hpl.hp.com>
* manager.c (pthread_allocate_stack): Unmap the stack top
if failed to map the stack bottom.
Fix the guard page.
(pthread_free): Fix the guard page.
* pthread.c (pthread_initialize): Set rlimit correctly for
NEED_SEPARATE_REGISTER_STACK.
1999-12-16 H.J. Lu <hjl@gnu.org>
* pthread.c (__pthread_initialize_manager): Pass
__pthread_manager_thread_bos instead of
__pthread_manager_thread_tos to __clone2.
1999-12-16 H.J. Lu <hjl@gnu.org>
* manager.c (pthread_allocate_stack): Correct the calculation
of "new_thread_bottom". Remove MAP_GROWSDOWN from mmap for
stack bottom.
1999-12-13 H.J. Lu <hjl@gnu.org>
* sysdeps/ia64/pt-machine.h (__compare_and_swap): Added a stop
bit after setting ar.ccv.
1999-12-12 H.J. Lu <hjl@gnu.org>
* manager.c (pthread_allocate_stack): Make the starting
address of the stack bottom page aligned. FIXME: it may
need changes in other places.
(pthread_handle_create): Likewise.
1999-12-11 Hans Boehm <hboehm@exch.hpl.hp.com>
* manager.c (pthread_allocate_stack): Handle
NEED_SEPARATE_REGISTER_STACK.
(pthread_handle_create): Likewise.
* pthread.c (__pthread_initialize_manager): Likewise.
* sysdeps/ia64/pt-machine.h: Use r13 for thread pointer.
1999-12-02 H.J. Lu <hjl@gnu.org>
* sysdeps/ia64/pt-machine.h: New.
2000-07-13 Ulrich Drepper <drepper@redhat.com>
* wrapsyscall.c: Mark non-__ protected names as weak.

View File

@ -311,6 +311,11 @@ static inline int nonexisting_handle(pthread_handle h, pthread_t id)
/* Fill in defaults left unspecified by pt-machine.h. */
/* We round up a value with page size. */
#ifndef page_roundup
#define page_roundup(v,p) ((((size_t) (v)) + (p) - 1) & ~((p) - 1))
#endif
/* The page size we can get from the system. This should likely not be
changed by the machine file but, you never know. */
#ifndef PAGE_SIZE

View File

@ -82,6 +82,13 @@ static int main_thread_exiting = 0;
static pthread_t pthread_threads_counter = 0;
#ifdef NEED_SEPARATE_REGISTER_STACK
/* Signal masks for the manager. These have to be global only when clone2
is used since it's currently borken wrt signals in the child. */
static sigset_t manager_mask; /* Manager normal signal mask */
static sigset_t manager_mask_all; /* All bits set. */
#endif
/* Forward declarations */
static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
@ -100,7 +107,9 @@ int __pthread_manager(void *arg)
{
int reqfd = (int) (long int) arg;
struct pollfd ufd;
sigset_t mask;
#ifndef NEED_SEPARATE_REGISTER_STACK
sigset_t manager_mask;
#endif
int n;
struct pthread_request request;
@ -112,12 +121,15 @@ int __pthread_manager(void *arg)
__pthread_manager_thread.p_errnop = &__pthread_manager_thread.p_errno;
__pthread_manager_thread.p_h_errnop = &__pthread_manager_thread.p_h_errno;
/* Block all signals except __pthread_sig_cancel and SIGTRAP */
sigfillset(&mask);
sigdelset(&mask, __pthread_sig_cancel); /* for thread termination */
sigdelset(&mask, SIGTRAP); /* for debugging purposes */
sigfillset(&manager_mask);
sigdelset(&manager_mask, __pthread_sig_cancel); /* for thread termination */
sigdelset(&manager_mask, SIGTRAP); /* for debugging purposes */
if (__pthread_threads_debug && __pthread_sig_debug > 0)
sigdelset(&mask, __pthread_sig_debug);
sigprocmask(SIG_SETMASK, &mask, NULL);
sigdelset(&manager_mask, __pthread_sig_debug);
sigprocmask(SIG_SETMASK, &manager_mask, NULL);
#ifdef NEED_SEPARATE_REGISTER_STACK
sigfillset(&manager_mask_all);
#endif
/* Raise our priority to match that of main thread */
__pthread_manager_adjust_prio(__pthread_main_thread->p_priority);
/* Synchronize debugging of the thread manager */
@ -294,7 +306,16 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
if (attr != NULL && attr->__stackaddr_set)
{
/* The user provided a stack. */
/* The user provided a stack. For now we interpret the supplied
address as 1 + the highest addr. in the stack segment. If a
separate register stack is needed, we place it at the low end
of the segment, relying on the associated stacksize to
determine the low end of the segment. This differs from many
(but not all) other pthreads implementations. The intent is
that on machines with a single stack growing toward higher
addresses, stackaddr would be the lowest address in the stack
segment, so that it is consistently close to the initial sp
value. */
new_thread =
(pthread_descr) ((long)(attr->__stackaddr) & -sizeof(void *)) - 1;
new_thread_bottom = (char *) attr->__stackaddr - attr->__stacksize;
@ -304,11 +325,57 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
}
else
{
stacksize = STACK_SIZE - pagesize;
if (attr != NULL)
stacksize = MIN (stacksize, roundup(attr->__stacksize, pagesize));
#ifdef NEED_SEPARATE_REGISTER_STACK
size_t granularity = 2 * pagesize;
/* Try to make stacksize/2 a multiple of pagesize */
#else
size_t granularity = pagesize;
#endif
/* Allocate space for stack and thread descriptor at default address */
if (attr != NULL)
{
guardsize = page_roundup (attr->__guardsize, granularity);
stacksize = STACK_SIZE - guardsize;
stacksize = MIN (stacksize,
page_roundup (attr->__stacksize, granularity));
}
else
{
guardsize = granularity;
stacksize = STACK_SIZE - granularity;
}
new_thread = default_new_thread;
#ifdef NEED_SEPARATE_REGISTER_STACK
new_thread_bottom = (char *) (new_thread + 1) - stacksize - guardsize;
/* Includes guard area, unlike the normal case. Use the bottom
end of the segment as backing store for the register stack.
Needed on IA64. In this case, we also map the entire stack at
once. According to David Mosberger, that's cheaper. It also
avoids the risk of intermittent failures due to other mappings
in the same region. The cost is that we might be able to map
slightly fewer stacks. */
/* First the main stack: */
if (mmap((caddr_t)((char *)(new_thread + 1) - stacksize / 2),
stacksize / 2, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)
== MAP_FAILED)
/* Bad luck, this segment is already mapped. */
return -1;
/* Then the register stack: */
if (mmap((caddr_t)new_thread_bottom, stacksize/2,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)
== MAP_FAILED)
{
munmap((caddr_t)((char *)(new_thread + 1) - stacksize/2),
stacksize/2);
return -1;
}
guardaddr = new_thread_bottom + stacksize/2;
/* We leave the guard area in the middle unmapped. */
#else /* !NEED_SEPARATE_REGISTER_STACK */
new_thread_bottom = (char *) (new_thread + 1) - stacksize;
if (mmap((caddr_t)((char *)(new_thread + 1) - INITIAL_STACK_SIZE),
INITIAL_STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
@ -317,10 +384,10 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
/* Bad luck, this segment is already mapped. */
return -1;
/* We manage to get a stack. Now see whether we need a guard
and allocate it if necessary. Notice that the default
attributes (stack_size = STACK_SIZE - pagesize) do not need
a guard page, since the RLIMIT_STACK soft limit prevents stacks
from running into one another. */
and allocate it if necessary. Notice that the default
attributes (stack_size = STACK_SIZE - pagesize and guardsize
= pagesize) do not need a guard page, since the RLIMIT_STACK
soft limit prevents stacks from running into one another. */
if (stacksize == STACK_SIZE - pagesize)
{
/* We don't need a guard page. */
@ -330,7 +397,6 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
else
{
/* Put a bad page at the bottom of the stack */
guardsize = attr->__guardsize;
guardaddr = (void *)new_thread_bottom - guardsize;
if (mmap ((caddr_t) guardaddr, guardsize, 0, MAP_FIXED, -1, 0)
== MAP_FAILED)
@ -340,6 +406,7 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
guardsize = 0;
}
}
#endif /* !NEED_SEPARATE_REGISTER_STACK */
}
/* Clear the thread data structure. */
memset (new_thread, '\0', sizeof (*new_thread));
@ -452,9 +519,30 @@ static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
__pthread_lock(new_thread->p_lock, NULL);
/* We have to report this event. */
#ifdef NEED_SEPARATE_REGISTER_STACK
/* Perhaps this version should be used on all platforms. But
this requires that __clone2 be uniformly supported
everywhere.
And there is some argument for changing the __clone2
interface to pass sp and bsp instead, making it more IA64
specific, but allowing stacks to grow outward from each
other, to get less paging and fewer mmaps. Clone2
currently can't take signals in the child right after
process creation. Mask them in the child. It resets the
mask once it starts up. */
sigprocmask(SIG_SETMASK, &manager_mask_all, NULL);
pid = __clone2(pthread_start_thread_event,
(void **)new_thread_bottom,
(char *)new_thread - new_thread_bottom,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
__pthread_sig_cancel, new_thread);
sigprocmask(SIG_SETMASK, &manager_mask, NULL);
#else
pid = __clone(pthread_start_thread_event, (void **) new_thread,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
__pthread_sig_cancel, new_thread);
#endif
if (pid != -1)
{
/* Now fill in the information about the new thread in
@ -479,18 +567,38 @@ static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
}
}
if (pid == 0)
pid = __clone(pthread_start_thread, (void **) new_thread,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
__pthread_sig_cancel, new_thread);
{
#ifdef NEED_SEPARATE_REGISTER_STACK
sigprocmask(SIG_SETMASK, &manager_mask_all, NULL);
pid = __clone2(pthread_start_thread,
(void **)new_thread_bottom,
(char *)new_thread - new_thread_bottom,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
__pthread_sig_cancel, new_thread);
sigprocmask(SIG_SETMASK, &manager_mask, NULL);
#else
pid = __clone(pthread_start_thread, (void **) new_thread,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
__pthread_sig_cancel, new_thread);
#endif /* !NEED_SEPARATE_REGISTER_STACK */
}
/* Check if cloning succeeded */
if (pid == -1) {
/* Free the stack if we allocated it */
if (attr == NULL || !attr->__stackaddr_set)
{
#ifdef NEED_SEPARATE_REGISTER_STACK
size_t stacksize = ((char *)(new_thread->p_guardaddr)
- new_thread_bottom);
munmap((caddr_t)new_thread_bottom, stacksize);
munmap((caddr_t)new_thread_bottom + stacksize
+ new_thread->p_guardsize, stacksize);
#else
if (new_thread->p_guardsize != 0)
munmap(new_thread->p_guardaddr, new_thread->p_guardsize);
munmap((caddr_t)((char *)(new_thread+1) - INITIAL_STACK_SIZE),
INITIAL_STACK_SIZE);
#endif
}
__pthread_handles[sseg].h_descr = NULL;
__pthread_handles[sseg].h_bottom = NULL;
@ -550,10 +658,27 @@ static void pthread_free(pthread_descr th)
if (th == &__pthread_initial_thread) return;
if (!th->p_userstack)
{
size_t guardsize = th->p_guardsize;
/* Free the stack and thread descriptor area */
if (th->p_guardsize != 0)
munmap(th->p_guardaddr, th->p_guardsize);
#ifdef NEED_SEPARATE_REGISTER_STACK
char *guardaddr = th->p_guardaddr;
/* We unmap exactly what we mapped, in case there was something
else in the same region. Guardaddr is always set, eve if
guardsize is 0. This allows us to compute everything else. */
size_t stacksize = (char *)(th+1) - guardaddr - guardsize;
/* Unmap the register stack, which is below guardaddr. */
munmap((caddr_t)(guardaddr-stacksize), stacksize);
/* Unmap the main stack. */
munmap((caddr_t)(guardaddr+guardsize), stacksize);
#else
/* The following assumes that we only allocate stacks of one
size. That's currently true but probably shouldn't be. This
looks like it fails for growing stacks if there was something
else mapped just below the stack? */
if (guardsize != 0)
munmap(th->p_guardaddr, guardsize);
munmap((caddr_t) ((char *)(th+1) - STACK_SIZE), STACK_SIZE);
#endif
}
}

View File

@ -362,7 +362,13 @@ static void pthread_initialize(void)
/* Play with the stack size limit to make sure that no stack ever grows
beyond STACK_SIZE minus one page (to act as a guard page). */
getrlimit(RLIMIT_STACK, &limit);
#ifdef NEED_SEPARATE_REGISTER_STACK
/* STACK_SIZE bytes hold both the main stack and register backing
store. The rlimit value applies to each individually. */
max_stack = STACK_SIZE/2 - __getpagesize();
#else
max_stack = STACK_SIZE - __getpagesize();
#endif
if (limit.rlim_cur > max_stack) {
limit.rlim_cur = max_stack;
setrlimit(RLIMIT_STACK, &limit);
@ -444,10 +450,18 @@ int __pthread_initialize_manager(void)
| __pthread_initial_thread.p_eventbuf.eventmask.event_bits[idx]))
!= 0)
{
#ifdef NEED_SEPARATE_REGISTER_STACK
pid = __clone2(__pthread_manager_event,
(void **) __pthread_manager_thread_bos,
THREAD_MANAGER_STACK_SIZE,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
(void *)(long)manager_pipe[0]);
#else
pid = __clone(__pthread_manager_event,
(void **) __pthread_manager_thread_tos,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
(void *)(long)manager_pipe[0]);
#endif
if (pid != -1)
{
@ -472,9 +486,18 @@ int __pthread_initialize_manager(void)
}
if (pid == 0)
pid = __clone(__pthread_manager, (void **) __pthread_manager_thread_tos,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
(void *)(long)manager_pipe[0]);
{
#ifdef NEED_SEPARATE_REGISTER_STACK
pid = __clone2(__pthread_manager, (void **) __pthread_manager_thread_bos,
THREAD_MANAGER_STACK_SIZE,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
(void *)(long)manager_pipe[0]);
#else
pid = __clone(__pthread_manager, (void **) __pthread_manager_thread_tos,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
(void *)(long)manager_pipe[0]);
#endif
}
if (pid == -1) {
free(__pthread_manager_thread_bos);
__libc_close(manager_pipe[0]);

View File

@ -0,0 +1,106 @@
/* Machine-dependent pthreads configuration and inline functions.
IA-64 version.
Copyright (C) 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#ifndef PT_EI
# define PT_EI extern inline
#endif
/* Make sure gcc doesn't try to be clever and move things around on
us. We need to use _exactly_ the address the user gave us, not some
alias that contains the same information. */
#define __atomic_fool_gcc(x) (*(volatile struct { int a[100]; } *)x)
#ifndef ELF_MACHINE_NAME
#define NEED_SEPARATE_REGISTER_STACK
/* Get some notion of the current stack. Need not be exactly the top
of the stack, just something somewhere in the current frame.
r12 (sp) is the stack pointer. */
#define CURRENT_STACK_FRAME stack_pointer
register char *stack_pointer __asm__ ("sp");
/* Register r13 (tp) is reserved by the ABI as "thread pointer". */
struct _pthread_descr_struct;
register struct _pthread_descr_struct *__thread_self __asm__("r13");
/* Return the thread descriptor for the current thread. */
#define THREAD_SELF __thread_self
/* Initialize the thread-unique value. */
#define INIT_THREAD_SELF(descr, nr) (__thread_self = (descr))
/* Access to data in the thread descriptor is easy. */
#define THREAD_GETMEM(descr, member) __thread_self->member
#define THREAD_GETMEM_NC(descr, member) __thread_self->member
#define THREAD_SETMEM(descr, member, value) __thread_self->member = (value)
#define THREAD_SETMEM_NC(descr, member, value) __thread_self->member = (value)
#define HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS
PT_EI long int
__compare_and_swap (long int *p, long int oldval, long int newval)
{
long int readval;
__asm__ __volatile__
("mov ar.ccv=%4;;\n\t"
"cmpxchg8.acq %0=%1,%2,ar.ccv"
: "=r" (readval), "=m" (__atomic_fool_gcc (p))
: "r"(newval), "1" (__atomic_fool_gcc (p)), "r" (oldval)
: "memory");
return readval == oldval;
}
PT_EI long int
__compare_and_swap_with_release_semantics (long int *p,
long int oldval,
long int newval)
{
long int readval;
__asm__ __volatile__
("mov ar.ccv=%4;;\n\t"
"cmpxchg8.rel %0=%1,%2,ar.ccv"
: "=r" (readval), "=m" (__atomic_fool_gcc (p))
: "r"(newval), "1" (__atomic_fool_gcc (p)), "r" (oldval)
: "memory");
return readval == oldval;
}
#endif /* ELF_MACHINE_NAME */
/* Spinlock implementation; required. */
PT_EI long int
testandset (int *spinlock)
{
long int ret;
__asm__ __volatile__(
"xchg4 %0=%1,%2"
: "=r"(ret), "=m"(__atomic_fool_gcc (spinlock))
: "r"(1), "1"(__atomic_fool_gcc (spinlock))
: "memory");
return ret;
}