2004-05-25  Steven Munroe  <sjmunroe@us.ibm.com>

	* sysdeps/powerpc/fpu/Makefile: Make ld.so a dependency of libm.so.
	* sysdeps/powerpc/fpu/bits/mathinline.h [__LIBC_INERNAL_MATH_INLINES]
	(__ieee754_sqrt): Define as __MATH_INLINE using fsqrt instruction.
	(__ieee754_sqrtf): Define as __MATH_INLINE using fsqrts instruction.
	* sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Moved
	implementation from w_sqrt.c.
	* sysdeps/powerpc/fpu/e_sqrtf.c (__slow_ieee754_sqrtf): Moved
	implementation from w_sqrtf.c.
	* sysdeps/powerpc/fpu/w_sqrt.c (__sqrt): Wrapper implementation
	using inline __ieee754_sqrt().
	* sysdeps/powerpc/fpu/w_sqrtf.c (__sqrtf): Wrapper implementation
	using inline __ieee754_sqrtf().
	* sysdeps/powerpc/powerpc32/sysdep.h [__ASSEMBLER__]: Include
	<sysdeps/powerpc/sysdep.h> independent of __ASSEMBLER__.
	* sysdeps/powerpc/sysdep.h [__ASSEMBLER__] (PPC_FEATURE_*): Define
	PPC_FEATURE_*  independent of __ASSEMBLER__.

2004-05-25  Jakub Jelinek  <jakub@redhat.com>

	* sysdeps/pthread/aio_notify.c: Use <> instead of "" for aio_misc.h
	include.
	(aio_start_notify_thread): Define if not defined.
	(notify_func_wrapper): Use it.
	* sysdeps/pthread/aio_misc.c: Use <> instead of "" for aio_misc.h
	include.
	(aio_create_helper_thread): Define if not defined.
	(__aio_create_helper_thread): New function.
	(__aio_enqueue_request): Use aio_create_helper_thread.

	* nis/ypclnt.c (ypall_data, ypall_foreach): Remove.
	(struct ypresp_all_data): New type.
	(__xdr_ypresp_all): Change second argument to
	struct ypresp_all_data *.  Replace ypall_foreach and
	ypall_data with objp->foreach and objp->data.
	(yp_all): Remove status variable, add data.  Replace
	all uses of status with data.status.  Initialize data.foreach
	and data.data instead of ypall_foreach and ypall_data.

2004-05-24  Jakub Jelinek  <jakub@redhat.com>

	* elf/dl-lookup.c (add_dependency): Set DF_1_NODELETE bit
	in l_flags_1, not in l_flags.
This commit is contained in:
Ulrich Drepper 2004-05-26 04:47:00 +00:00
parent a752d0cc54
commit ffdd5e50e1
14 changed files with 634 additions and 272 deletions

View File

@ -1,3 +1,48 @@
2004-05-25 Steven Munroe <sjmunroe@us.ibm.com>
* sysdeps/powerpc/fpu/Makefile: Make ld.so a dependency of libm.so.
* sysdeps/powerpc/fpu/bits/mathinline.h [__LIBC_INERNAL_MATH_INLINES]
(__ieee754_sqrt): Define as __MATH_INLINE using fsqrt instruction.
(__ieee754_sqrtf): Define as __MATH_INLINE using fsqrts instruction.
* sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Moved
implementation from w_sqrt.c.
* sysdeps/powerpc/fpu/e_sqrtf.c (__slow_ieee754_sqrtf): Moved
implementation from w_sqrtf.c.
* sysdeps/powerpc/fpu/w_sqrt.c (__sqrt): Wrapper implementation
using inline __ieee754_sqrt().
* sysdeps/powerpc/fpu/w_sqrtf.c (__sqrtf): Wrapper implementation
using inline __ieee754_sqrtf().
* sysdeps/powerpc/powerpc32/sysdep.h [__ASSEMBLER__]: Include
<sysdeps/powerpc/sysdep.h> independent of __ASSEMBLER__.
* sysdeps/powerpc/sysdep.h [__ASSEMBLER__] (PPC_FEATURE_*): Define
PPC_FEATURE_* independent of __ASSEMBLER__.
2004-05-25 Jakub Jelinek <jakub@redhat.com>
* sysdeps/pthread/aio_notify.c: Use <> instead of "" for aio_misc.h
include.
(aio_start_notify_thread): Define if not defined.
(notify_func_wrapper): Use it.
* sysdeps/pthread/aio_misc.c: Use <> instead of "" for aio_misc.h
include.
(aio_create_helper_thread): Define if not defined.
(__aio_create_helper_thread): New function.
(__aio_enqueue_request): Use aio_create_helper_thread.
* nis/ypclnt.c (ypall_data, ypall_foreach): Remove.
(struct ypresp_all_data): New type.
(__xdr_ypresp_all): Change second argument to
struct ypresp_all_data *. Replace ypall_foreach and
ypall_data with objp->foreach and objp->data.
(yp_all): Remove status variable, add data. Replace
all uses of status with data.status. Initialize data.foreach
and data.data instead of ypall_foreach and ypall_data.
2004-05-24 Jakub Jelinek <jakub@redhat.com>
* elf/dl-lookup.c (add_dependency): Set DF_1_NODELETE bit
in l_flags_1, not in l_flags.
2004-04-10 Robert Millan <robertmh@gnu.org> 2004-04-10 Robert Millan <robertmh@gnu.org>
* sysdeps/unix/sysv/linux/bits/in.h: Cosmetic fixes to get in sync * sysdeps/unix/sysv/linux/bits/in.h: Cosmetic fixes to get in sync

View File

@ -618,12 +618,16 @@ yp_order (const char *indomain, const char *inmap, unsigned int *outorder)
return YPERR_SUCCESS; return YPERR_SUCCESS;
} }
static void *ypall_data; struct ypresp_all_data
static int (*ypall_foreach) (int status, char *key, int keylen, {
unsigned long status;
void *data;
int (*foreach) (int status, char *key, int keylen,
char *val, int vallen, char *data); char *val, int vallen, char *data);
};
static bool_t static bool_t
__xdr_ypresp_all (XDR *xdrs, u_long *objp) __xdr_ypresp_all (XDR *xdrs, struct ypresp_all_data *objp)
{ {
while (1) while (1)
{ {
@ -633,13 +637,13 @@ __xdr_ypresp_all (XDR *xdrs, u_long *objp)
if (!xdr_ypresp_all (xdrs, &resp)) if (!xdr_ypresp_all (xdrs, &resp))
{ {
xdr_free ((xdrproc_t) xdr_ypresp_all, (char *) &resp); xdr_free ((xdrproc_t) xdr_ypresp_all, (char *) &resp);
*objp = YP_YPERR; objp->status = YP_YPERR;
return FALSE; return FALSE;
} }
if (resp.more == 0) if (resp.more == 0)
{ {
xdr_free ((xdrproc_t) xdr_ypresp_all, (char *) &resp); xdr_free ((xdrproc_t) xdr_ypresp_all, (char *) &resp);
*objp = YP_NOMORE; objp->status = YP_NOMORE;
return TRUE; return TRUE;
} }
@ -656,24 +660,24 @@ __xdr_ypresp_all (XDR *xdrs, u_long *objp)
But we are allowed to add data behind the buffer, But we are allowed to add data behind the buffer,
if we don't modify the length. So add an extra NUL if we don't modify the length. So add an extra NUL
character to avoid trouble with broken code. */ character to avoid trouble with broken code. */
*objp = YP_TRUE; objp->status = YP_TRUE;
memcpy (key, resp.ypresp_all_u.val.key.keydat_val, keylen); memcpy (key, resp.ypresp_all_u.val.key.keydat_val, keylen);
key[keylen] = '\0'; key[keylen] = '\0';
memcpy (val, resp.ypresp_all_u.val.val.valdat_val, vallen); memcpy (val, resp.ypresp_all_u.val.val.valdat_val, vallen);
val[vallen] = '\0'; val[vallen] = '\0';
xdr_free ((xdrproc_t) xdr_ypresp_all, (char *) &resp); xdr_free ((xdrproc_t) xdr_ypresp_all, (char *) &resp);
if ((*ypall_foreach) (*objp, key, keylen, if ((*objp->foreach) (objp->status, key, keylen,
val, vallen, ypall_data)) val, vallen, objp->data))
return TRUE; return TRUE;
} }
break; break;
default: default:
*objp = resp.ypresp_all_u.val.stat; objp->status = resp.ypresp_all_u.val.stat;
xdr_free ((xdrproc_t) xdr_ypresp_all, (char *) &resp); xdr_free ((xdrproc_t) xdr_ypresp_all, (char *) &resp);
/* Sun says we don't need to make this call, but must return /* Sun says we don't need to make this call, but must return
immediatly. Since Solaris makes this call, we will call immediatly. Since Solaris makes this call, we will call
the callback function, too. */ the callback function, too. */
(*ypall_foreach) (*objp, NULL, 0, NULL, 0, ypall_data); (*objp->foreach) (objp->status, NULL, 0, NULL, 0, objp->data);
return TRUE; return TRUE;
} }
} }
@ -689,7 +693,7 @@ yp_all (const char *indomain, const char *inmap,
enum clnt_stat result; enum clnt_stat result;
struct sockaddr_in clnt_sin; struct sockaddr_in clnt_sin;
CLIENT *clnt; CLIENT *clnt;
unsigned long status; struct ypresp_all_data data;
int clnt_sock; int clnt_sock;
int saved_errno = errno; int saved_errno = errno;
@ -725,12 +729,12 @@ yp_all (const char *indomain, const char *inmap,
req.domain = (char *) indomain; req.domain = (char *) indomain;
req.map = (char *) inmap; req.map = (char *) inmap;
ypall_foreach = incallback->foreach; data.foreach = incallback->foreach;
ypall_data = (void *) incallback->data; data.data = (void *) incallback->data;
result = clnt_call (clnt, YPPROC_ALL, (xdrproc_t) xdr_ypreq_nokey, result = clnt_call (clnt, YPPROC_ALL, (xdrproc_t) xdr_ypreq_nokey,
(caddr_t) &req, (xdrproc_t) __xdr_ypresp_all, (caddr_t) &req, (xdrproc_t) __xdr_ypresp_all,
(caddr_t) &status, RPCTIMEOUT); (caddr_t) &data, RPCTIMEOUT);
if (result != RPC_SUCCESS) if (result != RPC_SUCCESS)
{ {
@ -744,10 +748,10 @@ yp_all (const char *indomain, const char *inmap,
clnt_destroy (clnt); clnt_destroy (clnt);
if (res == YPERR_SUCCESS && status != YP_NOMORE) if (res == YPERR_SUCCESS && data.status != YP_NOMORE)
{ {
__set_errno (saved_errno); __set_errno (saved_errno);
return ypprot_err (status); return ypprot_err (data.status);
} }
++try; ++try;
} }

View File

@ -1,3 +1,7 @@
2004-05-25 Jakub Jelinek <jakub@redhat.com>
* sysdeps/unix/sysv/linux/aio_misc.h: New file.
2004-05-21 Jakub Jelinek <jakub@redhat.com> 2004-05-21 Jakub Jelinek <jakub@redhat.com>
* sysdeps/pthread/pthread_cond_wait.c (__pthread_cond_wait): Compare * sysdeps/pthread/pthread_cond_wait.c (__pthread_cond_wait): Compare

View File

@ -0,0 +1,67 @@
/* Copyright (C) 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2004.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation; either version 2.1 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#ifndef _AIO_MISC_H
# include_next <aio_misc.h>
# include <signal.h>
# include <sysdep.h>
# include <pthread.h>
# define aio_start_notify_thread __aio_start_notify_thread
# define aio_create_helper_thread __aio_create_helper_thread
extern inline void
__aio_start_notify_thread (void)
{
sigset_t ss;
sigemptyset (&ss);
INTERNAL_SYSCALL_DECL (err);
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, NULL, _NSIG / 8);
}
extern inline int
__aio_create_helper_thread (pthread_t *threadp, void *(*tf) (void *), void *arg)
{
pthread_attr_t attr;
/* Make sure the thread is created detached. */
pthread_attr_init (&attr);
pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
/* The helper thread needs only very little resources. */
(void) pthread_attr_setstacksize (&attr, PTHREAD_STACK_MIN);
/* Block all signals in the helper thread. To do this thoroughly we
temporarily have to block all signals here. */
sigset_t ss;
sigset_t oss;
sigfillset (&ss);
INTERNAL_SYSCALL_DECL (err);
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, &oss, _NSIG / 8);
int ret = pthread_create (threadp, &attr, tf, arg);
/* Restore the signal mask. */
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &oss, NULL,
_NSIG / 8);
(void) pthread_attr_destroy (&attr);
return ret;
}
#endif

View File

@ -1,3 +1,6 @@
ifeq ($(subdir),math) ifeq ($(subdir),math)
libm-support += fenv_const fe_nomask t_sqrt libm-support += fenv_const fe_nomask t_sqrt
# libm needs ld.so to access dl_hwcap
$(objpfx)libm.so: $(elfobjdir)/ld.so
endif endif

View File

@ -121,4 +121,56 @@ fdimf (float __x, float __y) __THROW
#endif /* __USE_ISOC99 */ #endif /* __USE_ISOC99 */
#endif /* !__NO_MATH_INLINES && __OPTIMIZE__ */ #endif /* !__NO_MATH_INLINES && __OPTIMIZE__ */
/* This code is used internally in the GNU libc. */
# ifdef __LIBC_INTERNAL_MATH_INLINES
#include <sysdep.h>
#include <ldsodefs.h>
#include <dl-procinfo.h>
extern double __slow_ieee754_sqrt (double);
__MATH_INLINE double
__ieee754_sqrt (double __x)
{
double __z;
/* If the CPU is 64-bit we can use the optional FP instructions we. */
if ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
{
/* Volatile is required to prevent the compiler from moving the
fsqrt instruction above the branch. */
__asm __volatile (
" fsqrt %0,%1\n"
: "=f" (__z)
: "f" (__x));
}
else
__z = __slow_ieee754_sqrt(__x);
return __z;
}
extern float __slow_ieee754_sqrtf (float);
__MATH_INLINE float
__ieee754_sqrtf (float __x)
{
float __z;
/* If the CPU is 64-bit we can use the optional FP instructions we. */
if ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
{
/* Volatile is required to prevent the compiler from moving the
fsqrts instruction above the branch. */
__asm __volatile (
" fsqrts %0,%1\n"
: "=f" (__z)
: "f" (__x));
}
else
__z = __slow_ieee754_sqrtf(__x);
return __z;
}
# endif /* __LIBC_INTERNAL_MATH_INLINES */
#endif /* __GNUC__ && !_SOFT_FLOAT */ #endif /* __GNUC__ && !_SOFT_FLOAT */

View File

@ -1 +1,185 @@
/* __ieee754_sqrt is in w_sqrt.c */ /* Double-precision floating point square root.
Copyright (C) 1997, 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <math.h>
#include <math_private.h>
#include <fenv_libc.h>
#include <inttypes.h>
#include <sysdep.h>
#include <ldsodefs.h>
#include <dl-procinfo.h>
static const double almost_half = 0.5000000000000001; /* 0.5 + 2^-53 */
static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
static const float two108 = 3.245185536584267269e+32;
static const float twom54 = 5.551115123125782702e-17;
extern const float __t_sqrt[1024];
/* The method is based on a description in
Computation of elementary functions on the IBM RISC System/6000 processor,
P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
Basically, it consists of two interleaved Newton-Rhapson approximations,
one to find the actual square root, and one to find its reciprocal
without the expense of a division operation. The tricky bit here
is the use of the POWER/PowerPC multiply-add operation to get the
required accuracy with high speed.
The argument reduction works by a combination of table lookup to
obtain the initial guesses, and some careful modification of the
generated guesses (which mostly runs on the integer unit, while the
Newton-Rhapson is running on the FPU). */
#ifdef __STDC__
double
__slow_ieee754_sqrt (double x)
#else
double
__slow_ieee754_sqrt (x)
double x;
#endif
{
const float inf = a_inf.value;
if (x > 0)
{
/* schedule the EXTRACT_WORDS to get separation between the store
and the load. */
ieee_double_shape_type ew_u;
ieee_double_shape_type iw_u;
ew_u.value = (x);
if (x != inf)
{
/* Variables named starting with 's' exist in the
argument-reduced space, so that 2 > sx >= 0.5,
1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
Variables named ending with 'i' are integer versions of
floating-point values. */
double sx; /* The value of which we're trying to find the
square root. */
double sg, g; /* Guess of the square root of x. */
double sd, d; /* Difference between the square of the guess and x. */
double sy; /* Estimate of 1/2g (overestimated by 1ulp). */
double sy2; /* 2*sy */
double e; /* Difference between y*g and 1/2 (se = e * fsy). */
double shx; /* == sx * fsg */
double fsg; /* sg*fsg == g. */
fenv_t fe; /* Saved floating-point environment (stores rounding
mode and whether the inexact exception is
enabled). */
uint32_t xi0, xi1, sxi, fsgi;
const float *t_sqrt;
fe = fegetenv_register ();
/* complete the EXTRACT_WORDS (xi0,xi1,x) operation. */
xi0 = ew_u.parts.msw;
xi1 = ew_u.parts.lsw;
relax_fenv_state ();
sxi = (xi0 & 0x3fffffff) | 0x3fe00000;
/* schedule the INSERT_WORDS (sx, sxi, xi1) to get separation
between the store and the load. */
iw_u.parts.msw = sxi;
iw_u.parts.lsw = xi1;
t_sqrt = __t_sqrt + (xi0 >> (52 - 32 - 8 - 1) & 0x3fe);
sg = t_sqrt[0];
sy = t_sqrt[1];
/* complete the INSERT_WORDS (sx, sxi, xi1) operation. */
sx = iw_u.value;
/* Here we have three Newton-Rhapson iterations each of a
division and a square root and the remainder of the
argument reduction, all interleaved. */
sd = -(sg * sg - sx);
fsgi = (xi0 + 0x40000000) >> 1 & 0x7ff00000;
sy2 = sy + sy;
sg = sy * sd + sg; /* 16-bit approximation to sqrt(sx). */
/* schedule the INSERT_WORDS (fsg, fsgi, 0) to get separation
between the store and the load. */
INSERT_WORDS (fsg, fsgi, 0);
iw_u.parts.msw = fsgi;
iw_u.parts.lsw = (0);
e = -(sy * sg - almost_half);
sd = -(sg * sg - sx);
if ((xi0 & 0x7ff00000) == 0)
goto denorm;
sy = sy + e * sy2;
sg = sg + sy * sd; /* 32-bit approximation to sqrt(sx). */
sy2 = sy + sy;
/* complete the INSERT_WORDS (fsg, fsgi, 0) operation. */
fsg = iw_u.value;
e = -(sy * sg - almost_half);
sd = -(sg * sg - sx);
sy = sy + e * sy2;
shx = sx * fsg;
sg = sg + sy * sd; /* 64-bit approximation to sqrt(sx),
but perhaps rounded incorrectly. */
sy2 = sy + sy;
g = sg * fsg;
e = -(sy * sg - almost_half);
d = -(g * sg - shx);
sy = sy + e * sy2;
fesetenv_register (fe);
return g + sy * d;
denorm:
/* For denormalised numbers, we normalise, calculate the
square root, and return an adjusted result. */
fesetenv_register (fe);
return __slow_ieee754_sqrt (x * two108) * twom54;
}
}
else if (x < 0)
{
/* For some reason, some PowerPC32 processors don't implement
FE_INVALID_SQRT. */
#ifdef FE_INVALID_SQRT
feraiseexcept (FE_INVALID_SQRT);
if (!fetestexcept (FE_INVALID))
#endif
feraiseexcept (FE_INVALID);
x = a_nan.value;
}
return f_wash (x);
}
#ifdef __STDC__
double
__ieee754_sqrt (double x)
#else
double
__ieee754_sqrt (x)
double x;
#endif
{
double z;
/* If the CPU is 64-bit we can use the optional FP instructions we. */
if ((GLRO (dl_hwcap) & PPC_FEATURE_64) != 0)
{
/* Volatile is required to prevent the compiler from moving the
fsqrt instruction above the branch. */
__asm __volatile (" fsqrt %0,%1\n"
:"=f" (z):"f" (x));
}
else
z = __slow_ieee754_sqrt (x);
return z;
}

View File

@ -1 +1,162 @@
/* __ieee754_sqrtf is in w_sqrtf.c */ /* Single-precision floating point square root.
Copyright (C) 1997, 2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <math.h>
#include <math_private.h>
#include <fenv_libc.h>
#include <inttypes.h>
#include <sysdep.h>
#include <ldsodefs.h>
#include <dl-procinfo.h>
static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */
static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
static const float two48 = 281474976710656.0;
static const float twom24 = 5.9604644775390625e-8;
extern const float __t_sqrt[1024];
/* The method is based on a description in
Computation of elementary functions on the IBM RISC System/6000 processor,
P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
Basically, it consists of two interleaved Newton-Rhapson approximations,
one to find the actual square root, and one to find its reciprocal
without the expense of a division operation. The tricky bit here
is the use of the POWER/PowerPC multiply-add operation to get the
required accuracy with high speed.
The argument reduction works by a combination of table lookup to
obtain the initial guesses, and some careful modification of the
generated guesses (which mostly runs on the integer unit, while the
Newton-Rhapson is running on the FPU). */
#ifdef __STDC__
float
__slow_ieee754_sqrtf (float x)
#else
float
__slow_ieee754_sqrtf (x)
float x;
#endif
{
const float inf = a_inf.value;
if (x > 0)
{
if (x != inf)
{
/* Variables named starting with 's' exist in the
argument-reduced space, so that 2 > sx >= 0.5,
1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
Variables named ending with 'i' are integer versions of
floating-point values. */
float sx; /* The value of which we're trying to find the square
root. */
float sg, g; /* Guess of the square root of x. */
float sd, d; /* Difference between the square of the guess and x. */
float sy; /* Estimate of 1/2g (overestimated by 1ulp). */
float sy2; /* 2*sy */
float e; /* Difference between y*g and 1/2 (note that e==se). */
float shx; /* == sx * fsg */
float fsg; /* sg*fsg == g. */
fenv_t fe; /* Saved floating-point environment (stores rounding
mode and whether the inexact exception is
enabled). */
uint32_t xi, sxi, fsgi;
const float *t_sqrt;
GET_FLOAT_WORD (xi, x);
fe = fegetenv_register ();
relax_fenv_state ();
sxi = (xi & 0x3fffffff) | 0x3f000000;
SET_FLOAT_WORD (sx, sxi);
t_sqrt = __t_sqrt + (xi >> (23 - 8 - 1) & 0x3fe);
sg = t_sqrt[0];
sy = t_sqrt[1];
/* Here we have three Newton-Rhapson iterations each of a
division and a square root and the remainder of the
argument reduction, all interleaved. */
sd = -(sg * sg - sx);
fsgi = (xi + 0x40000000) >> 1 & 0x7f800000;
sy2 = sy + sy;
sg = sy * sd + sg; /* 16-bit approximation to sqrt(sx). */
e = -(sy * sg - almost_half);
SET_FLOAT_WORD (fsg, fsgi);
sd = -(sg * sg - sx);
sy = sy + e * sy2;
if ((xi & 0x7f800000) == 0)
goto denorm;
shx = sx * fsg;
sg = sg + sy * sd; /* 32-bit approximation to sqrt(sx),
but perhaps rounded incorrectly. */
sy2 = sy + sy;
g = sg * fsg;
e = -(sy * sg - almost_half);
d = -(g * sg - shx);
sy = sy + e * sy2;
fesetenv_register (fe);
return g + sy * d;
denorm:
/* For denormalised numbers, we normalise, calculate the
square root, and return an adjusted result. */
fesetenv_register (fe);
return __slow_ieee754_sqrtf (x * two48) * twom24;
}
}
else if (x < 0)
{
/* For some reason, some PowerPC32 processors don't implement
FE_INVALID_SQRT. */
#ifdef FE_INVALID_SQRT
feraiseexcept (FE_INVALID_SQRT);
if (!fetestexcept (FE_INVALID))
#endif
feraiseexcept (FE_INVALID);
x = a_nan.value;
}
return f_washf (x);
}
#ifdef __STDC__
float
__ieee754_sqrtf (float x)
#else
float
__ieee754_sqrtf (x)
float x;
#endif
{
double z;
/* If the CPU is 64-bit we can use the optional FP instructions we. */
if ((GLRO (dl_hwcap) & PPC_FEATURE_64) != 0)
{
/* Volatile is required to prevent the compiler from moving the
fsqrt instruction above the branch. */
__asm __volatile (" fsqrts %0,%1\n"
:"=f" (z):"f" (x));
}
else
z = __slow_ieee754_sqrtf (x);
return z;
}

View File

@ -1,5 +1,5 @@
/* Double-precision floating point square root. /* Double-precision floating point square root wrapper.
Copyright (C) 1997, 2002, 2003 Free Software Foundation, Inc. Copyright (C) 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
@ -17,130 +17,35 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */ 02111-1307 USA. */
#include <math.h> #include "math.h"
#include <math_private.h> #include "math_private.h"
#include <fenv_libc.h> #include <fenv_libc.h>
#include <inttypes.h>
static const double almost_half = 0.5000000000000001; /* 0.5 + 2^-53 */ #ifdef __STDC__
static const ieee_float_shape_type a_nan = { .word = 0x7fc00000 };
static const ieee_float_shape_type a_inf = { .word = 0x7f800000 };
static const float two108 = 3.245185536584267269e+32;
static const float twom54 = 5.551115123125782702e-17;
extern const float __t_sqrt[1024];
/* The method is based on a description in
Computation of elementary functions on the IBM RISC System/6000 processor,
P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
Basically, it consists of two interleaved Newton-Rhapson approximations,
one to find the actual square root, and one to find its reciprocal
without the expense of a division operation. The tricky bit here
is the use of the POWER/PowerPC multiply-add operation to get the
required accuracy with high speed.
The argument reduction works by a combination of table lookup to
obtain the initial guesses, and some careful modification of the
generated guesses (which mostly runs on the integer unit, while the
Newton-Rhapson is running on the FPU). */
double double
__sqrt(double x) __sqrt (double x) /* wrapper sqrt */
#else
double
__sqrt (x) /* wrapper sqrt */
double x;
#endif
{ {
const float inf = a_inf.value; #ifdef _IEEE_LIBM
/* x = f_wash(x); *//* This ensures only one exception for SNaN. */ return __ieee754_sqrt (x);
if (x > 0) #else
{ double z;
if (x != inf) z = __ieee754_sqrt (x);
{ if (_LIB_VERSION == _IEEE_ || (x != x))
/* Variables named starting with 's' exist in the return z;
argument-reduced space, so that 2 > sx >= 0.5,
1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
Variables named ending with 'i' are integer versions of
floating-point values. */
double sx; /* The value of which we're trying to find the
square root. */
double sg,g; /* Guess of the square root of x. */
double sd,d; /* Difference between the square of the guess and x. */
double sy; /* Estimate of 1/2g (overestimated by 1ulp). */
double sy2; /* 2*sy */
double e; /* Difference between y*g and 1/2 (se = e * fsy). */
double shx; /* == sx * fsg */
double fsg; /* sg*fsg == g. */
fenv_t fe; /* Saved floating-point environment (stores rounding
mode and whether the inexact exception is
enabled). */
uint32_t xi0, xi1, sxi, fsgi;
const float *t_sqrt;
fe = fegetenv_register(); if (x < 0.0)
EXTRACT_WORDS (xi0,xi1,x); return __kernel_standard (x, x, 26); /* sqrt(negative) */
relax_fenv_state();
sxi = (xi0 & 0x3fffffff) | 0x3fe00000;
INSERT_WORDS (sx, sxi, xi1);
t_sqrt = __t_sqrt + (xi0 >> (52-32-8-1) & 0x3fe);
sg = t_sqrt[0];
sy = t_sqrt[1];
/* Here we have three Newton-Rhapson iterations each of a
division and a square root and the remainder of the
argument reduction, all interleaved. */
sd = -(sg*sg - sx);
fsgi = (xi0 + 0x40000000) >> 1 & 0x7ff00000;
sy2 = sy + sy;
sg = sy*sd + sg; /* 16-bit approximation to sqrt(sx). */
INSERT_WORDS (fsg, fsgi, 0);
e = -(sy*sg - almost_half);
sd = -(sg*sg - sx);
if ((xi0 & 0x7ff00000) == 0)
goto denorm;
sy = sy + e*sy2;
sg = sg + sy*sd; /* 32-bit approximation to sqrt(sx). */
sy2 = sy + sy;
e = -(sy*sg - almost_half);
sd = -(sg*sg - sx);
sy = sy + e*sy2;
shx = sx * fsg;
sg = sg + sy*sd; /* 64-bit approximation to sqrt(sx),
but perhaps rounded incorrectly. */
sy2 = sy + sy;
g = sg * fsg;
e = -(sy*sg - almost_half);
d = -(g*sg - shx);
sy = sy + e*sy2;
fesetenv_register (fe);
return g + sy*d;
denorm:
/* For denormalised numbers, we normalise, calculate the
square root, and return an adjusted result. */
fesetenv_register (fe);
return __sqrt(x * two108) * twom54;
}
}
else if (x < 0)
{
#ifdef FE_INVALID_SQRT
feraiseexcept (FE_INVALID_SQRT);
/* For some reason, some PowerPC processors don't implement
FE_INVALID_SQRT. I guess no-one ever thought they'd be
used for square roots... :-) */
if (!fetestexcept (FE_INVALID))
#endif
feraiseexcept (FE_INVALID);
#ifndef _IEEE_LIBM
if (_LIB_VERSION != _IEEE_)
x = __kernel_standard(x,x,26);
else else
return z;
#endif #endif
x = a_nan.value;
}
return f_wash(x);
} }
weak_alias (__sqrt, sqrt) weak_alias (__sqrt, sqrt)
/* Strictly, this is wrong, but the only places where _ieee754_sqrt is
used will not pass in a negative result. */
strong_alias(__sqrt,__ieee754_sqrt)
#ifdef NO_LONG_DOUBLE #ifdef NO_LONG_DOUBLE
weak_alias (__sqrt, __sqrtl) strong_alias (__sqrt, __sqrtl) weak_alias (__sqrt, sqrtl)
weak_alias (__sqrt, sqrtl)
#endif #endif

View File

@ -1,5 +1,5 @@
/* Single-precision floating point square root. /* Single-precision floating point square root wrapper.
Copyright (C) 1997, 2003 Free Software Foundation, Inc. Copyright (C) 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
@ -17,120 +17,38 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */ 02111-1307 USA. */
#include <math.h> #include "math.h"
#include <math_private.h> #include "math_private.h"
#include <fenv_libc.h> #include <fenv_libc.h>
#include <inttypes.h>
static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */ #include <sysdep.h>
static const ieee_float_shape_type a_nan = { .word = 0x7fc00000 }; #include <ldsodefs.h>
static const ieee_float_shape_type a_inf = { .word = 0x7f800000 }; #include <dl-procinfo.h>
static const float two48 = 281474976710656.0;
static const float twom24 = 5.9604644775390625e-8;
extern const float __t_sqrt[1024];
/* The method is based on a description in #ifdef __STDC__
Computation of elementary functions on the IBM RISC System/6000 processor,
P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
Basically, it consists of two interleaved Newton-Rhapson approximations,
one to find the actual square root, and one to find its reciprocal
without the expense of a division operation. The tricky bit here
is the use of the POWER/PowerPC multiply-add operation to get the
required accuracy with high speed.
The argument reduction works by a combination of table lookup to
obtain the initial guesses, and some careful modification of the
generated guesses (which mostly runs on the integer unit, while the
Newton-Rhapson is running on the FPU). */
float float
__sqrtf(float x) __sqrtf (float x) /* wrapper sqrtf */
#else
float
__sqrtf (x) /* wrapper sqrtf */
float x;
#endif
{ {
const float inf = a_inf.value; #ifdef _IEEE_LIBM
/* x = f_washf(x); *//* This ensures only one exception for SNaN. */ return __ieee754_sqrtf (x);
if (x > 0) #else
{ float z;
if (x != inf) z = __ieee754_sqrtf (x);
{
/* Variables named starting with 's' exist in the
argument-reduced space, so that 2 > sx >= 0.5,
1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
Variables named ending with 'i' are integer versions of
floating-point values. */
float sx; /* The value of which we're trying to find the square
root. */
float sg,g; /* Guess of the square root of x. */
float sd,d; /* Difference between the square of the guess and x. */
float sy; /* Estimate of 1/2g (overestimated by 1ulp). */
float sy2; /* 2*sy */
float e; /* Difference between y*g and 1/2 (note that e==se). */
float shx; /* == sx * fsg */
float fsg; /* sg*fsg == g. */
fenv_t fe; /* Saved floating-point environment (stores rounding
mode and whether the inexact exception is
enabled). */
uint32_t xi, sxi, fsgi;
const float *t_sqrt;
GET_FLOAT_WORD (xi, x); if (_LIB_VERSION == _IEEE_ || (x != x))
fe = fegetenv_register (); return z;
relax_fenv_state ();
sxi = (xi & 0x3fffffff) | 0x3f000000;
SET_FLOAT_WORD (sx, sxi);
t_sqrt = __t_sqrt + (xi >> (23-8-1) & 0x3fe);
sg = t_sqrt[0];
sy = t_sqrt[1];
/* Here we have three Newton-Rhapson iterations each of a if (x < (float) 0.0)
division and a square root and the remainder of the /* sqrtf(negative) */
argument reduction, all interleaved. */ return (float) __kernel_standard ((double) x, (double) x, 126);
sd = -(sg*sg - sx);
fsgi = (xi + 0x40000000) >> 1 & 0x7f800000;
sy2 = sy + sy;
sg = sy*sd + sg; /* 16-bit approximation to sqrt(sx). */
e = -(sy*sg - almost_half);
SET_FLOAT_WORD (fsg, fsgi);
sd = -(sg*sg - sx);
sy = sy + e*sy2;
if ((xi & 0x7f800000) == 0)
goto denorm;
shx = sx * fsg;
sg = sg + sy*sd; /* 32-bit approximation to sqrt(sx),
but perhaps rounded incorrectly. */
sy2 = sy + sy;
g = sg * fsg;
e = -(sy*sg - almost_half);
d = -(g*sg - shx);
sy = sy + e*sy2;
fesetenv_register (fe);
return g + sy*d;
denorm:
/* For denormalised numbers, we normalise, calculate the
square root, and return an adjusted result. */
fesetenv_register (fe);
return __sqrtf(x * two48) * twom24;
}
}
else if (x < 0)
{
#ifdef FE_INVALID_SQRT
feraiseexcept (FE_INVALID_SQRT);
/* For some reason, some PowerPC processors don't implement
FE_INVALID_SQRT. I guess no-one ever thought they'd be
used for square roots... :-) */
if (!fetestexcept (FE_INVALID))
#endif
feraiseexcept (FE_INVALID);
#ifndef _IEEE_LIBM
if (_LIB_VERSION != _IEEE_)
x = __kernel_standard(x,x,126);
else else
return z;
#endif #endif
x = a_nan.value;
}
return f_washf(x);
} }
weak_alias (__sqrtf, sqrtf) weak_alias (__sqrtf, sqrtf)
/* Strictly, this is wrong, but the only places where _ieee754_sqrt is
used will not pass in a negative result. */
strong_alias(__sqrtf,__ieee754_sqrtf)

View File

@ -17,10 +17,10 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */ 02111-1307 USA. */
#ifdef __ASSEMBLER__
#include <sysdeps/powerpc/sysdep.h> #include <sysdeps/powerpc/sysdep.h>
#ifdef __ASSEMBLER__
#ifdef __ELF__ #ifdef __ELF__
/* If compiled for profiling, call `_mcount' at the start of each /* If compiled for profiling, call `_mcount' at the start of each

View File

@ -16,6 +16,20 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */ 02111-1307 USA. */
/*
* Powerpc Feature masks for the Aux Vector Hardware Capabilities (AT_HWCAP).
* This entry is copied to _dl_hwcap or rtld_global._dl_hwcap during startup.
* The following must match the kernels linux/asm/cputable.h.
*/
#define PPC_FEATURE_32 0x80000000 /* 32-bit mode. */
#define PPC_FEATURE_64 0x40000000 /* 64-bit mode. */
#define PPC_FEATURE_601_INSTR 0x20000000 /* 601 chip, Old POWER ISA. */
#define PPC_FEATURE_HAS_ALTIVEC 0x10000000 /* SIMD/Vector Unit. */
#define PPC_FEATURE_HAS_FPU 0x08000000 /* Floating Point Unit. */
#define PPC_FEATURE_HAS_MMU 0x04000000 /* Memory Management Unit. */
#define PPC_FEATURE_HAS_4xxMAC 0x02000000 /* 4xx Multiply Accumulator. */
#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 /* Unified I/D cache. */
#ifdef __ASSEMBLER__ #ifdef __ASSEMBLER__
/* Symbolic names for the registers. The only portable way to write asm /* Symbolic names for the registers. The only portable way to write asm
@ -146,19 +160,5 @@
#define ASM_SIZE_DIRECTIVE(name) .size name,.-name #define ASM_SIZE_DIRECTIVE(name) .size name,.-name
#endif /* __ELF__ */ #endif /* __ELF__ */
/*
* Powerpc Feature masks for the Aux Vector Hardware Capabilities (AT_HWCAP).
* This entry is copied to _dl_hwcap or rtld_global._dl_hwcap during startup.
* The following must match the kernels linux/asm/cputable.h.
*/
#define PPC_FEATURE_32 0x80000000 /* 32-bit mode. */
#define PPC_FEATURE_64 0x40000000 /* 64-bit mode. */
#define PPC_FEATURE_601_INSTR 0x20000000 /* 601 chip, Old POWER ISA. */
#define PPC_FEATURE_HAS_ALTIVEC 0x10000000 /* SIMD/Vector Unit. */
#define PPC_FEATURE_HAS_FPU 0x08000000 /* Floating Point Unit. */
#define PPC_FEATURE_HAS_MMU 0x04000000 /* Memory Management Unit. */
#define PPC_FEATURE_HAS_4xxMAC 0x02000000 /* 4xx Multiply Accumulator. */
#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 /* Unified I/D cache. */
#endif /* __ASSEMBLER__ */ #endif /* __ASSEMBLER__ */

View File

@ -27,8 +27,27 @@
#include <unistd.h> #include <unistd.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/time.h> #include <sys/time.h>
#include <aio_misc.h>
#include "aio_misc.h" #ifndef aio_create_helper_thread
# define aio_create_helper_thread __aio_create_helper_thread
extern inline int
__aio_create_helper_thread (pthread_t *threadp, void *(*tf) (void *), void *arg)
{
pthread_attr_t attr;
/* Make sure the thread is created detached. */
pthread_attr_init (&attr);
pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
int ret = pthread_create (threadp, &attr, tf, arg);
(void) pthread_attr_destroy (&attr);
return ret;
}
#endif
static void add_request_to_runlist (struct requestlist *newrequest); static void add_request_to_runlist (struct requestlist *newrequest);
@ -400,16 +419,11 @@ __aio_enqueue_request (aiocb_union *aiocbp, int operation)
if (nthreads < optim.aio_threads && idle_thread_count == 0) if (nthreads < optim.aio_threads && idle_thread_count == 0)
{ {
pthread_t thid; pthread_t thid;
pthread_attr_t attr;
/* Make sure the thread is created detached. */
pthread_attr_init (&attr);
pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
running = newp->running = allocated; running = newp->running = allocated;
/* Now try to start a thread. */ /* Now try to start a thread. */
if (pthread_create (&thid, &attr, handle_fildes_io, newp) == 0) if (aio_create_helper_thread (&thid, handle_fildes_io, newp) == 0)
/* We managed to enqueue the request. All errors which can /* We managed to enqueue the request. All errors which can
happen now can be recognized by calls to `aio_return' and happen now can be recognized by calls to `aio_return' and
`aio_error'. */ `aio_error'. */

View File

@ -1,5 +1,6 @@
/* Notify initiator of AIO request. /* Notify initiator of AIO request.
Copyright (C) 1997,98,99,2000,2001,2003 Free Software Foundation, Inc. Copyright (C) 1997, 1998, 1999, 2000, 2001, 2003, 2004
Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@ -22,8 +23,11 @@
#include <pthread.h> #include <pthread.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h> #include <unistd.h>
#include "aio_misc.h" #include <aio_misc.h>
#ifndef aio_start_notify_thread
# define aio_start_notify_thread() do { } while (0)
#endif
struct notify_func struct notify_func
{ {
@ -34,6 +38,7 @@ struct notify_func
static void * static void *
notify_func_wrapper (void *arg) notify_func_wrapper (void *arg)
{ {
aio_start_notify_thread ();
struct notify_func *const n = arg; struct notify_func *const n = arg;
void (*func) (sigval_t) = n->func; void (*func) (sigval_t) = n->func;
sigval_t value = n->value; sigval_t value = n->value;