update from main archive 961105

2024-11-10 15:20:10 +00:00 · 1996-11-06 04:24:11 +00:00 · 1996-11-06 04:24:11 +00:00 · 60c74cf07a
commit 60c74cf07a
parent cbb7824d08
38 changed files with 1448 additions and 232 deletions
--- a/sysdeps/alpha/Makefile
+++ b/sysdeps/alpha/Makefile
@ -42,4 +42,4 @@ ifeq ($(subdir),elf)
 sysdep-CFLAGS += -mno-fp-regs
 endif
-divrem := divl divlu divq divqu reml remlu remq remqu
+divrem := divl divq reml remq
--- a/sysdeps/alpha/bsd-_setjmp.S
+++ b/sysdeps/alpha/bsd-_setjmp.S
@ -25,6 +25,10 @@ Cambridge, MA 02139, USA.  */
 ENTRY(_setjmp)
 	ldgp	$29,0($27)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
 	bis	$31, $31, $17		/* Pass a second argument of zero.  */
 	jmp	$31, __sigsetjmp	/* Call __sigsetjmp.  */
--- a/sysdeps/alpha/bsd-setjmp.S
+++ b/sysdeps/alpha/bsd-setjmp.S
@ -25,6 +25,10 @@ Cambridge, MA 02139, USA.  */
 ENTRY(setjmp)
 	ldgp	$29, 0($27)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
 	bis	$31, 1, $17		/* Pass a second argument of one.  */
 	jmp	$31, __sigsetjmp	/* Call __sigsetjmp.  */
--- a/sysdeps/alpha/bzero.S
+++ b/sysdeps/alpha/bzero.S
@ -80,7 +80,14 @@ $tail:	bne	t4, 1f		# is there a tail to do?
 	.end bzero_loop
 ENTRY(bzero)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	mov	a0, v0		# e0    : move return value in place
 	beq	a1, $done	# .. e1 : early exit for zero-length store
--- a/sysdeps/alpha/div.S
+++ b/sysdeps/alpha/div.S
@ -0,0 +1,110 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   Contributed by Richard Henderson (rth@tamu.edu)
   This file is part of the GNU C Library.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.
   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If
   not, write to the Free Software Foundation, Inc., 675 Mass Ave,
   Cambridge, MA 02139, USA.  */
 #include <sysdep.h>
 #ifdef __linux__
 # include <asm/gentrap.h>
 # include <asm/pal.h>
 #else
 # include <machine/pal.h>
 #endif
 	.set noat
 	.align 4
 	.globl div
 	.ent div
 div:
 	.frame sp, 0, ra
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 #define dividend  t0
 #define divisor   t1
 #define mask      t2
 #define quotient  t3
 #define modulus   t4
 #define tmp1      t5
 #define tmp2      t6
 #define compare   t7
 	/* find correct sign for input to unsigned divide loop. */
 	sextl	a1, a1				# e0    :
 	sextl	a2, a2				# .. e1 :
 	negl	a1, dividend			# e0    :
 	negl	a2, divisor			# .. e1 :
 	cmovge	a1, a1, dividend		# e0    :
 	cmovge	a2, a2, divisor			# .. e1 :
 	beq	a2, $divbyzero			# e1    :
 	unop					#       :
 	/* shift divisor left, using 3-bit shifts for 32-bit divides as we
 	   can't overflow.  Three-bit shifts will result in looping three
 	   times less here, but can result in two loops more later.  Thus
 	   using a large shift isn't worth it (and s8addq pairs better than
 	   a shift).  */
 1:	cmpult	divisor, modulus, compare	# e0    :
 	s8addq	divisor, zero, divisor		# .. e1 :
 	s8addq	mask, zero, mask		# e0    :
 	bne	compare, 1b			# .. e1 :
 	/* start to go right again. */
 2:	addq	quotient, mask, tmp2		# e1    :
 	srl	mask, 1, mask			# .. e0 :
 	cmpule	divisor, modulus, compare	# e0    :
 	subq	modulus, divisor, tmp1		# .. e1 :
 	cmovne	compare, tmp2, quotient		# e1    :
 	srl	divisor, 1, divisor		# .. e0 :
 	cmovne	compare, tmp1, modulus		# e0    :
 	bne	mask, 2b			# .. e1 :
 	/* find correct sign for result.  */
 	xor	a1, a2, compare			# e0    :
 	negl	quotient, tmp1			# .. e1 :
 	negl	modulus, tmp2			# e0    :
 	cmovlt	compare, tmp1, quotient		# .. e1 :
 	cmovlt	a1, tmp2, modulus		# e1    :
 	/* and store it away in the structure.  */
 	stl	quotient, 0(a0)			# .. e0 :
 	mov	a0, v0				# e1    :
 	stl	modulus, 4(a0)			# .. e0 :
 	ret					# e1    :
 $divbyzero:
 	mov	a0, v0
 	ldiq	a0, GEN_INTDIV
 	call_pal PAL_gentrap
 	/* if trap returns, return zero.  */
 	stl	zero, 0(v0)
 	stl	zero, 4(v0)
 	ret
 	.end div
--- a/sysdeps/alpha/divl.S
+++ b/sysdeps/alpha/divl.S
@ -1,6 +1,6 @@
 #define IS_REM		0
 #define SIZE		4
-#define SIGNED		1
+#define UFUNC_NAME	__divlu
-#define FUNC_NAME	__divl
+#define SFUNC_NAME	__divl
 #include "divrem.h"
--- a/sysdeps/alpha/divlu.S
+++ b/sysdeps/alpha/divlu.S
@ -1,6 +0,0 @@
 #define IS_REM		0
 #define SIZE		4
 #define SIGNED		0
 #define FUNC_NAME	__divlu
 #include "divrem.h"
--- a/sysdeps/alpha/divq.S
+++ b/sysdeps/alpha/divq.S
@ -1,6 +1,6 @@
 #define IS_REM		0
 #define SIZE		8
-#define SIGNED		1
+#define UFUNC_NAME	__divqu
-#define FUNC_NAME	__divq
+#define SFUNC_NAME	__divq
 #include "divrem.h"
--- a/sysdeps/alpha/divqu.S
+++ b/sysdeps/alpha/divqu.S
@ -1,6 +0,0 @@
 #define IS_REM		0
 #define SIZE		8
 #define SIGNED		0
 #define FUNC_NAME	__divqu
 #include "divrem.h"
--- a/sysdeps/alpha/divrem.h
+++ b/sysdeps/alpha/divrem.h
@ -1,25 +1,25 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   Contributed by David Mosberger (davidm@cs.arizona.edu).
-This file is part of the GNU C Library.
+   This file is part of the GNU C Library.
-The GNU C Library is free software; you can redistribute it and/or
+   The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
+   modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
+   published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+   License, or (at your option) any later version.
-The GNU C Library is distributed in the hope that it will be useful,
+   The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
+   Library General Public License for more details.
-You should have received a copy of the GNU Library General Public
+   You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB.  If
+   License along with the GNU C Library; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+   not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA.  */
+   Cambridge, MA 02139, USA.  */
 /* The current Alpha chips don't provide hardware for integer
-division.  The C compiler expects the functions
+   division.  The C compiler expects the functions
 	__divqu: 64-bit unsigned long divide
 	__remqu: 64-bit unsigned long remainder
@ -27,10 +27,10 @@ division.  The C compiler expects the functions
 	__divlu/__remlu: unsigned 32-bit
 	__divls/__remls: signed 32-bit
-These are not normal C functions: instead of the normal calling
+   These are not normal C functions: instead of the normal calling
-sequence, these expect their arguments in registers t10 and t11, and
+   sequence, these expect their arguments in registers t10 and t11, and
-return the result in t12 (aka pv). Register AT may be clobbered
+   return the result in t12 (aka pv).  Register AT may be clobbered
-(assembly temporary), anything else must be saved.  */
+   (assembly temporary), anything else must be saved.  */
 #include <sysdep.h>
@ -41,77 +41,144 @@ return the result in t12 (aka pv). Register AT may be clobbered
 # include <machine/pal.h>
 #endif
-#ifdef DEBUG
+#define mask			v0
-# define arg1		a0
+#define divisor			t0
-# define arg2		a1
+#define compare			AT
-# define result		v0
+#define tmp1			t2
-# define mask		t0
+#define tmp2			t3
-# define tmp0		t1
+#define retaddr			t9
-# define tmp1		t2
+#define arg1			t10
-# define sign		t3
+#define arg2			t11
-# define retaddr	ra
+#define result			t12
 #else
 # define arg1		t10
 # define arg2		t11
 # define result		t12
 # define mask		v0
 # define tmp0		t0
 # define tmp1		t1
 # define sign		t2
 # define retaddr	t9
 #endif
 # define divisor	arg2
 #if IS_REM
-# define dividend	result
+# define DIV_ONLY(x,y...)
-# define quotient	arg1
+# define REM_ONLY(x,y...)	x,##y
-# define GETDIVIDEND	bis arg1,zero,dividend
+# define modulus		result
 # define quotient		t1
 # define GETSIGN(x)		mov arg1, x
 # define STACK			32
 #else
-# define dividend	arg1
+# define DIV_ONLY(x,y...)	x,##y
-# define quotient	result
+# define REM_ONLY(x,y...)
-# define GETDIVIDEND
+# define modulus		t1
 # define quotient		result
 # define GETSIGN(x)		xor arg1, arg2, x
 # define STACK			48
 #endif
 #if SIZE == 8
-# define LONGIFYarg1	GETDIVIDEND
+# define LONGIFY(x,y)		mov x,y
-# define LONGIFYarg2
+# define SLONGIFY(x,y)		mov x,y
 # define _SLONGIFY(x)
 # define NEG(x,y)		negq x,y
 #else
-# if SIGNED
+# define LONGIFY(x,y)		zapnot x,15,y
-#  define LONGIFYarg1	addl	arg1,zero,dividend
+# define SLONGIFY(x,y)		sextl x,y
-#  define LONGIFYarg2	addl	arg2,zero,divisor
+# define _SLONGIFY(x)		sextl x,x
-# else
+# define NEG(x,y)		negl x,y
 #  define LONGIFYarg1	zapnot	arg1,0x0f,dividend
 #  define LONGIFYarg2	zapnot	arg2,0x0f,divisor
 # endif
 #endif
 #if SIGNED
 # define SETSIGN(sign,reg,tmp)	subq zero,reg,tmp; cmovlt sign,tmp,reg
 # if IS_REM
 #  define GETSIGN(x,y,s)	bis	x,zero,s
 # else
 #  define GETSIGN(x,y,s)	xor	x,y,s
 # endif
 #else
 # define SETSIGN(sign,reg,tmp)
 # define GETSIGN(x,y,s)
 #endif
 	.set noreorder
 	.set noat
-	.ent FUNC_NAME
+	.ent UFUNC_NAME
-	.globl FUNC_NAME
+	.globl UFUNC_NAME
-#define FRAME_SIZE	0x30
+	.align 3
-
+UFUNC_NAME:
-	.align 5
+	lda	sp, -STACK(sp)
-FUNC_NAME:
+	.frame	sp, STACK, retaddr, 0
 #ifdef PROF
-	lda	sp, -0x18(sp)
+	stq	ra, 0(sp)
-	stq	ra, 0x00(sp)
+	stq	pv, 8(sp)
-	stq	pv, 0x08(sp)
+	stq	gp, 16(sp)
-	stq	gp, 0x10(sp)
+
 	br	AT, 1f
 1:	ldgp	gp, 0(AT)
 	mov	retaddr, ra
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	ldq	ra, 0(sp)
 	ldq	pv, 8(sp)
 	ldq	gp, 16(sp)
 #endif
 	.prologue 0
 $udiv:
 	stq	t0, 0(sp)
 	LONGIFY	(arg2, divisor)
 	stq	t1, 8(sp)
 	LONGIFY	(arg1, modulus)
 	stq	v0, 16(sp)
 	clr	quotient
 	stq	tmp1, 24(sp)
 	ldiq	mask, 1
 	DIV_ONLY(stq tmp2,32(sp))
 	beq	divisor, $divbyzero
 	.align 3
 #if SIZE == 8
 	/* Shift divisor left.  */
 1:	cmpult	divisor, modulus, compare
 	blt	divisor, 2f
 	addq	divisor, divisor, divisor
 	addq	mask, mask, mask
 	bne	compare, 1b
 	unop
 2:
 #else
 	/* Shift divisor left using 3-bit shifts as we can't overflow.
 	   This results in looping three times less here, but up to
 	   two more times later.  Thus using a large shift isn't worth it.  */
 1:	cmpult	divisor, modulus, compare
 	s8addq	divisor, zero, divisor
 	s8addq	mask, zero, mask
 	bne	compare, 1b
 #endif
 	/* Now go back to the right.  */
 3:	DIV_ONLY(addq quotient, mask, tmp2)
 	srl	mask, 1, mask
 	cmpule	divisor, modulus, compare
 	subq	modulus, divisor, tmp1
 	DIV_ONLY(cmovne compare, tmp2, quotient)
 	srl	divisor, 1, divisor
 	cmovne	compare, tmp1, modulus
 	bne	mask, 3b
 $done:	ldq	t0, 0(sp)
 	ldq	t1, 8(sp)
 	ldq	v0, 16(sp)
 	ldq	tmp1, 24(sp)
 	DIV_ONLY(ldq tmp2, 32(sp))
 	lda	sp, STACK(sp)
 	ret	zero, (retaddr), 1
 $divbyzero:
 	mov	a0, tmp1
 	ldiq	a0, GEN_INTDIV
 	call_pal PAL_gentrap
 	mov	tmp1, a0
 	clr	result			/* If trap returns, return zero.  */
 	br	$done
 	.end UFUNC_NAME
 	.ent SFUNC_NAME
 	.globl SFUNC_NAME
 	.align 3
 SFUNC_NAME:
 	lda	sp, -STACK(sp)
 	.frame	sp, STACK, retaddr, 0
 #ifdef PROF
 	stq	ra, 0(sp)
 	stq	pv, 8(sp)
 	stq	gp, 16(sp)
 	br	AT, 1f
 1:	ldgp	gp, 0(AT)
@ -119,69 +186,40 @@ FUNC_NAME:
 	mov	retaddr, ra
 	jsr	AT, _mcount
-	ldq	ra, 0x00(sp)
+	ldq	ra, 0(sp)
-	ldq	pv, 0x08(sp)
+	ldq	pv, 8(sp)
-	ldq	gp, 0x10(sp)
+	ldq	gp, 16(sp)
 	lda	sp, 0x18(sp)
 #endif
-	.frame	sp, FRAME_SIZE, retaddr, 0
+	.prologue 0
 	lda	sp,-FRAME_SIZE(sp)
 	.prologue 1
 	stq	arg1,0x00(sp)
 	LONGIFYarg1
 	stq	arg2,0x08(sp)
 	LONGIFYarg2
 	stq	mask,0x10(sp)
 	bis	zero,1,mask
 	stq	tmp0,0x18(sp)
 	bis	zero,zero,quotient
 	stq	tmp1,0x20(sp)
 	beq	divisor,$divbyzero
 	stq	sign,0x28(sp)
 	GETSIGN(dividend,divisor,sign)
 #if SIGNED
 	subq	zero,dividend,tmp0
 	subq	zero,divisor,tmp1
 	cmovlt	dividend,tmp0,dividend
 	cmovlt	divisor,tmp1,divisor
 #endif
 	/*
 	 * Shift divisor left until either bit 63 is set or until it
 	 * is at least as big as the dividend:
 	 */
 	.align	3
 1:	cmpule	dividend,divisor,AT
 	blt	divisor,2f
 	blbs	AT,2f
 	addq	mask,mask,mask
 	addq	divisor,divisor,divisor
 	br	1b
-	.align	3
+	or	arg1, arg2, AT
-2:	addq	mask,quotient,tmp0
+	_SLONGIFY(AT)
-	cmpule	divisor,dividend,AT
+	bge	AT, $udiv		/* don't need to mess with signs */
 	subq	dividend,divisor,tmp1
 	srl	divisor,1,divisor
 	srl	mask,1,mask
 	cmovlbs	AT,tmp0,quotient
 	cmovlbs	AT,tmp1,dividend
 	bne	mask,2b
-	ldq	arg1,0x00(sp)
+	/* Save originals and find absolute values.  */
-	SETSIGN(sign,result,tmp0)
+	stq	arg1, 0(sp)
-$done:	ldq	arg2,0x08(sp)
+	NEG	(arg1, AT)
-	ldq	mask,0x10(sp)
+	stq	arg2, 8(sp)
-	ldq	tmp0,0x18(sp)
+	cmovge	AT, AT, arg1
-	ldq	tmp1,0x20(sp)
+	stq	retaddr, 16(sp)
-	ldq	sign,0x28(sp)
+	NEG	(arg2, AT)
-	lda	sp,FRAME_SIZE(sp)
+	stq	tmp1, 24(sp)
-	ret	zero,(retaddr),0
+	cmovge	AT, AT, arg2
-$divbyzero:
+	/* Do the unsigned division.  */
-	lda	a0,GEN_INTDIV(zero)
+	bsr	retaddr, UFUNC_NAME
 	call_pal PAL_gentrap
 	bis	zero,zero,result	/* if trap returns, return 0 */
 	ldq	arg1,0x00(sp)
 	br	$done
-	END(FUNC_NAME)
+	/* Restore originals and adjust the sign of the result.  */
 	ldq	arg1, 0(sp)
 	ldq	arg2, 8(sp)
 	GETSIGN	(AT)
 	NEG	(result, tmp1)
 	_SLONGIFY(AT)
 	ldq	retaddr, 16(sp)
 	cmovlt	AT, tmp1, result
 	ldq	tmp1, 24(sp)
 	lda	sp, STACK(sp)
 	ret	zero, (retaddr), 1
 	.end	SFUNC_NAME
--- a/sysdeps/alpha/ffs.S
+++ b/sysdeps/alpha/ffs.S
@ -27,7 +27,14 @@ architecture.  */
        .set noat
 ENTRY(ffs)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	ldq_u	zero, 0(sp)	# on the 21064, this helps dual-issuing
 	addl	a0, zero, a0	# the last insn and reduces the stall
--- a/sysdeps/alpha/htonl.S
+++ b/sysdeps/alpha/htonl.S
@ -19,7 +19,15 @@ Cambridge, MA 02139, USA.  */
 #include <sysdep.h>
 ENTRY(__htonl)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	extlh	a0, 5, t1	# t1 = dd000000
 	zap	a0, 0xfd, t2	# t2 = 0000cc00
 	sll	t2, 5, t2	# t2 = 00198000
--- a/sysdeps/alpha/htons.S
+++ b/sysdeps/alpha/htons.S
@ -19,7 +19,15 @@ Cambridge, MA 02139, USA.  */
 #include <sysdep.h>
 ENTRY(__htons)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	extwh	a0, 7, t1	# t1 = bb00
 	extbl	a0, 1, v0	# v0 = 00aa
 	bis	v0, t1, v0	# v0 = bbaa
--- a/sysdeps/alpha/ldiv.S
+++ b/sysdeps/alpha/ldiv.S
@ -0,0 +1,109 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   Contributed by Richard Henderson (rth@tamu.edu)
   This file is part of the GNU C Library.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.
   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If
   not, write to the Free Software Foundation, Inc., 675 Mass Ave,
   Cambridge, MA 02139, USA.  */
 #include <sysdep.h>
 #ifdef __linux__
 # include <asm/gentrap.h>
 # include <asm/pal.h>
 #else
 # include <machine/pal.h>
 #endif
 	.set noat
 	.align 4
 	.globl ldiv
 	.ent ldiv
 ldiv:
 	.frame sp, 0, ra
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 #define dividend  t0
 #define divisor   t1
 #define mask      t2
 #define quotient  t3
 #define modulus   t4
 #define tmp1      t5
 #define tmp2      t6
 #define compare   t7
 	/* find correct sign for input to unsigned divide loop. */
 	mov	a1, dividend			# e0    :
 	mov	a2, divisor			# .. e1 :
 	negq	a1, tmp1			# e0    :
 	negq	a2, tmp2			# .. e1 :
 	cmovlt	a1, tmp1, dividend		# e0    :
 	cmovlt	a2, tmp2, divisor		# .. e1 :
 	beq	a2, $divbyzero			# e1    :
 	unop					#       :
 	/* shift divisor left.  */
 1:	cmpult	divisor, modulus, compare	# e0    :
 	blt	divisor, 2f			# .. e1 :
 	addq	divisor, divisor, divisor	# e0    :
 	addq	mask, mask, mask		# .. e1 :
 	bne	compare, 1b			# e1    :
 	unop					#       :
 	/* start to go right again. */
 2:	addq	quotient, mask, tmp2		# e1    :
 	srl	mask, 1, mask			# .. e0 :
 	cmpule	divisor, modulus, compare	# e0    :
 	subq	modulus, divisor, tmp1		# .. e1 :
 	cmovne	compare, tmp2, quotient		# e1    :
 	srl	divisor, 1, divisor		# .. e0 :
 	cmovne	compare, tmp1, modulus		# e0    :
 	bne	mask, 2b			# .. e1 :
 	/* find correct sign for result.  */
 	xor	a1, a2, compare			# e0    :
 	negq	quotient, tmp1			# .. e1 :
 	negq	modulus, tmp2			# e0    :
 	cmovlt	compare, tmp1, quotient		# .. e1 :
 	cmovlt	a1, tmp2, modulus		# e1    :
 	/* and store it away in the structure.  */
 9:	stq	quotient, 0(a0)			# .. e0 :
 	mov	a0, v0				# e1    :
 	stq	modulus, 8(a0)			# .. e0 :
 	ret					# e1    :
 $divbyzero:
 	mov	a0, v0
 	lda	a0, GEN_INTDIV
 	call_pal PAL_gentrap
 	/* if trap returns, return zero.  */
 	stq	zero, 0(v0)
 	stq	zero, 8(v0)
 	ret
 	.end ldiv
 weak_alias(ldiv, lldiv)
--- a/sysdeps/alpha/lldiv.S
+++ b/sysdeps/alpha/lldiv.S
@ -0,0 +1 @@
 /* lldiv is the same as ldiv on the Alpha.  */
--- a/sysdeps/alpha/memchr.S
+++ b/sysdeps/alpha/memchr.S
@ -40,7 +40,14 @@ For correctness consider that:
        .set noat
 ENTRY(memchr)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	beq	a2, $not_found
        ldq_u   t0, 0(a0)       # load first quadword (a0 may be misaligned)
--- a/sysdeps/alpha/memcpy.S
+++ b/sysdeps/alpha/memcpy.S
@ -0,0 +1,276 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   Contributed by Richard Henderson (rth@tamu.edu)
   This file is part of the GNU C Library.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.
   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If
   not, write to the Free Software Foundation, Inc., 675 Mass Ave,
   Cambridge, MA 02139, USA.  */
 /* This is the child of the C-with-inline-assembly memcpy posted by
   Martin Ostermann (ost@comnets.rwth-aachen.de).
   This is generally scheduled for the EV5, but whenever necessary and
   possible, the autoswap slotting feature of the EV5 is used so that the
   code lays out nicely for the EV4 as well.  */
 #include <alpha/regdef.h>
 	.set noreorder
 	.text
 	.ent copy_fwd_aligned
 copy_fwd_aligned:
 	.frame sp, 0, ra, 0
 	.prologue 0
 	/* Aligned forward copy main loop.  On entry to this basic block:
 	   t0 == source word waiting to be stored
 	   t2 == loop counter
 	   a0 == destination pointer
 	   a1 == source pointer
 	   a2 mod 8 == byte count in final word */
 	.align 4
 $fa_loop:
 	and	t2, 7, t1	# e0    :
 	beq	t1, 1f		# .. e1 :
 0:	stq_u	t0, 0(a0)	# e0    :
 	subq	t1, 1, t1	# .. e1 :
 	ldq_u	t0, 8(a1)	# e0    : copy up to seven words
 	addq	a0, 8, a0	# .. e1 :
 	addq	a1, 8, a1	# e0    :
 	bne	t1, 0b		# .. e1 :
 1:	bic	t2, 7, t2	# e0    :
 	beq	t2, $fa_tail	# .. e1 :
 2:	stq_u	t0, 0(a0)	# e0    :
 	addq	a0, 64, a0	# .. e1 :
 	ldq_u	t3, 8(a1)	# e0    : copy eight words as fast as we can
 	ldq_u	t4, 16(a1)	# .. e1 :
 	ldq_u	t5, 24(a1)	# e0    :
 	ldq_u	t6, 32(a1)	# .. e1 :
 	ldq_u	t7, 40(a1)	# e0    :
 	ldq_u	t8, 48(a1)	# .. e1 :
 	ldq_u	t9, 56(a1)	# e0    :
 	ldq_u	t0, 64(a1)	# .. e1 :
 	stq_u	t3, -56(a0)	# e0    :
 	subq	t2, 8, t2	# .. e1 :
 	stq_u	t4, -48(a0)	# e0    :
 	addq	a1, 64, a1	# .. e1 :
 	stq_u	t5, -40(a0)	# e0    :
 	stq_u	t6, -32(a0)	# e0    :
 	stq_u	t7, -24(a0)	# e0    :
 	stq_u	t8, -16(a0)	# e0    :
 	stq_u	t9, -8(a0)	# e0    :
 	bne	t2, 2b		# .. e1 :
 	/* Take care of a partial word tail.  */
 $fa_tail:
 	and	a2, 7, t3	# e0    :
 	bne	t3, 1f		# .. e1 (zdb)
 	/* Aligned copy, aligned tail, final store.  */
 	stq_u	t0, 0(a0)
 	ret
 1:	ldq_u	t1, 0(a0)	# e1    :
 	mskql	t0, a2, t0	# .. e1 :
 	mskqh	t1, a2, t1	# e0 (stall)
 	bis	t0, t1, t0	# e1    :
 	stq_u	t0, 0(a0)	# e0    :
 	ret			# .. e1 :
 	/* This is the actual entry point to this function.  */
 	.align 3
 $fwd_aligned:
 	ldq_u	t0, 0(a1)	# e0    :
 	and	a0, 7, t3	# .. e1 :
 	addq	a2, t3, a2	# e0    :
 	subq	a2, 1, t2	# e1    :
 	sra	t2, 3, t2	# e0    :
 	beq	t3, $fa_loop	# .. e1 :
 	ldq_u	t1, 0(a0)	# e0    :
 	beq	t2, $fa_small	# .. e1 :
 	mskqh	t0, a0, t0	# e0    :
 	mskql	t1, a0, t3	# e0    :
 	bis	t0, t3, t0	# e0    :
 	br	$fa_loop	# .. e1 :
 	/* The move affects exactly one destination word.  */
 $fa_small:
 	mskqh	t0, a0, t0	# e0    :
 	and	a2, 7, t4	# .. e1 :
 	mskql	t1, a0, t3	# e0    :
 	bne	t4, 1f		# .. e1 :
 	or	t0, t3, t0	# e0    :
 	unop			#       :
 	stq_u	t0, 0(a0)	# e0    :
 	ret			# .. e1 :
 1:	mskql	t0, a2, t0	# e0    :
 	mskqh	t1, a2, t1	# e0    :
 	or	t0, t3, t0	# e0    :
 	or	t0, t1, t0	# e1    :
 	stq_u	t0, 0(a0)	# e0    :
 	ret			# .. e1 :
 	.end copy_fwd_aligned
 	.ent memcpy
 	.globl memcpy
 	.align 3
 memcpy:
 	.frame sp, 0, ra, 0
 #ifdef PROF
 	ldgp	gp, 0(ra)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	mov	a0, v0
 	beq	a2, $zero_length
 	/* Are source and destination co-aligned?  */
 	xor	a0, a1, t0
 	unop
 	and	t0, 7, t0
 	beq	t0, $fwd_aligned
 	br	$fwd_unaligned
 	.end memcpy
 	.ent copy_fwd_unaligned
 copy_fwd_unaligned:
 	.frame sp, 0, ra, 0
 	.prologue 0
 	/* Unaligned forward copy main loop.  On entry to this basic block:
 	   t0 == source low word, unshifted
 	   t2 == loop counter
 	   t7 == last source byte + 1
 	   a0 == destination pointer
 	   a1 == source pointer
 	   a2 mod 8 == byte count in final word */
 	.align 4
 $fu_loop:
 	beq	t2, $fu_tail	# e1    :
 	blbc	t2, 0f		# e1    :
 	ldq_u	t1, 8(a1)	# e1    : copy one unaligned word
 	extql	t0, a1, t3	# .. e0 :
 	addq	a1, 8, a1	# e0    :
 	addq	a0, 8, a0	# .. e1 :
 	extqh	t1, a1, t4	# e0    :
 	subq	t2, 1, t2	# .. e1 :
 	mov	t1, t0		# e0    :
 	or	t3, t4, t3	# .. e1 :
 	stq_u	t3, -8(a0)	# e0    :
 	beq	t2, $fu_tail	# .. e1 :
 0:	ldq_u	t1, 8(a1)	# e1    : copy two unaligned words
 	extql	t0, a1, t3	# .. e0 :
 	ldq_u	t0, 16(a1)	# e0    :
 	subq	t2, 2, t2	# .. e1 :
 	extqh	t1, a1, t4	# e0    :
 	addq	a0, 16, a0	# .. e1 :
 	extql	t1, a1, t5	# e0    :
 	or	t3, t4, t3	# .. e1 :
 	extqh	t0, a1, t6	# e0    :
 	addq	a1, 16, a1	# .. e1 :
 	stq_u	t3, -16(a0)	# e0    :
 	or	t5, t6, t5	# .. e1 :
 	stq_u	t5, -8(a0)	# e0    :
 	bne	t2, 0b		# .. e1 :
 	/* Take care of a partial words tail.  */
 $fu_tail:
 	ldq_u	t4, -1(t7)	# e1    :
 	extql	t0, a1, t3	# .. e0 :
 	extqh	t4, a1, t4	# e0 (stall)
 	and	a2, 7, t5	# .. e1 :
 	or	t3, t4, t3	# e0    :
 	beq	t5, 1f		# .. e1 :
 	ldq_u	t1, 0(a0)	# e1    :
 	mskql	t3, a2, t3	# .. e0 :
 	mskqh	t1, a2, t1	# e0 (stall)
 	or	t1, t3, t3	# e1    :
 1:	stq_u	t3, 0(a0)	# e0    :
 	ret			# .. e1 :
 	/* The entry point to the unaligned forward copy.  */
 	.align 3
 $fwd_unaligned:
 	ldq_u	t0, 0(a1)	# e0    : load initial bits of src
 	addq	a1, a2, t7	# .. e1 : record last byte + 1 of src
 	and	a0, 7, t3	# e0    : find dst misalignment
 	addq	a2, t3, a2	# e1    : find number of words affected
 	subq	a2, 1, t2	# e0    :
 	cmple	a2, 8, t4	# .. e1 : are we dealing with a small block?
 	subq	a1, t3, a1	# e0    :
 	bne	t4, $fu_small	# .. e1 :
 	srl	t2, 3, t2	# e0    :
 	beq	t3, $fu_loop	# .. e1 :
 	/* Take care of an unaligned dst head.  */
 	ldq_u	t5, 0(a0)	# e0    :
 	ldq_u	t1, 8(a1)	# .. e1 :
 	extql	t0, a1, t3	# e0    :
 	addq	a0, 8, a0	# .. e1 :
 	extqh	t1, a1, t4	# e0    :
 	addq	a1, 8, a1	# .. e1 :
 	mskql	t5, a0, t5	# e0    :
 	or	t3, t4, t3	# .. e1 :
 	mskqh	t3, a0, t3	# e0    :
 	subq	t2, 1, t2	# .. e1 :
 	or	t3, t5, t3	# e0    :
 	mov	t1, t0		# .. e1 :
 	stq_u	t3, -8(a0)	# e0    :
 	br	$fu_loop	# .. e1 :
 	/* The move affects exactly one destination word.  */
 	.align 3
 $fu_small:
 	ldq_u	t2, 0(a0)	# e1    :
 	extql	t0, a1, t3	# .. e0 :
 	ldq_u	t1, -1(t7)	# e0    :
 	and	a2, 7, t8	# .. e1 :
 	mskqh	t2, a2, t6	# e0    :
 	mskql	t2, a0, t5	# e0    :
 	extqh	t1, a1, t4	# e0    :
 	cmovne	t8, t6, t8	# .. e1 :
 	or	t3, t4, t3	# e0    :
 	or	t5, t8, t5	# .. e1 :
 	mskqh	t3, a0, t3	# e0    :
 	and	a2, 7, t8	# .. e1 :
 	mskql	t3, a2, t6	# e0    :
 	cmovne	t8, t6, t8	# e1    :
 	or	t3, t5, t3	# e0    :
 	unop			#       :
 	stq_u	t3, 0(a0)	# e0    :
 $zero_length:
 	ret			# .. e1 :
 	.end copy_fwd_unaligned
--- a/sysdeps/alpha/memset.S
+++ b/sysdeps/alpha/memset.S
@ -85,7 +85,14 @@ $tail:	bne	t4, 1f		# is there a tail to do?
 	.end memset_loop
 ENTRY(memset)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	zapnot	a1, 1, a1	# e0    : zero extend input character
 	mov	a0, v0		# .. e1 : move return value in place
--- a/sysdeps/alpha/reml.S
+++ b/sysdeps/alpha/reml.S
@ -1,6 +1,6 @@
 #define IS_REM		1
 #define SIZE		4
-#define SIGNED		1
+#define UFUNC_NAME	__remlu
-#define FUNC_NAME	__reml
+#define SFUNC_NAME	__reml
 #include "divrem.h"
--- a/sysdeps/alpha/remlu.S
+++ b/sysdeps/alpha/remlu.S
@ -1,6 +0,0 @@
 #define IS_REM		1
 #define SIZE		4
 #define SIGNED		0
 #define FUNC_NAME	__remlu
 #include "divrem.h"
--- a/sysdeps/alpha/remq.S
+++ b/sysdeps/alpha/remq.S
@ -1,6 +1,6 @@
 #define IS_REM		1
 #define SIZE		8
-#define SIGNED		1
+#define UFUNC_NAME	__remqu
-#define FUNC_NAME	__remq
+#define SFUNC_NAME	__remq
 #include "divrem.h"
--- a/sysdeps/alpha/remqu.S
+++ b/sysdeps/alpha/remqu.S
@ -1,6 +0,0 @@
 #define IS_REM		1
 #define SIZE		8
 #define SIGNED		0
 #define FUNC_NAME	__remqu
 #include "divrem.h"
--- a/sysdeps/alpha/s_copysign.S
+++ b/sysdeps/alpha/s_copysign.S
@ -20,7 +20,15 @@ Cambridge, MA 02139, USA.  */
 #include <sysdep.h>
 ENTRY(__copysign)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	cpys	$f17,$f16,$f0
 	ret
--- a/sysdeps/alpha/s_fabs.S
+++ b/sysdeps/alpha/s_fabs.S
@ -20,7 +20,15 @@ Cambridge, MA 02139, USA.  */
 #include <sysdep.h>
 ENTRY(__fabs)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	cpys	$f31,$f16,$f0
 	ret
--- a/sysdeps/alpha/setjmp.S
+++ b/sysdeps/alpha/setjmp.S
@ -23,6 +23,10 @@ Cambridge, MA 02139, USA.  */
   extra arguments.  */
 ENTRY (__sigsetjmp)
 	ldgp	$29, 0($27)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
 	bis	$30, $30, $18		/* Pass SP as 3rd arg.  */
--- a/sysdeps/alpha/stpcpy.S
+++ b/sysdeps/alpha/stpcpy.S
@ -27,6 +27,10 @@ Cambridge, MA 02139, USA.  */
 ENTRY(__stpcpy)
 	ldgp	gp, 0(pv)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
 	jsr	t9, __stxcpy	# do the work of the copy
--- a/sysdeps/alpha/stpncpy.S
+++ b/sysdeps/alpha/stpncpy.S
@ -1,24 +1,23 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Richard Henderson (rth@tamu.edu)
-This file is part of the GNU C Library.
+   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
-The GNU C Library is free software; you can redistribute it and/or
+   The GNU C Library is distributed in the hope that it will be useful,
-modify it under the terms of the GNU Library General Public License as
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-published by the Free Software Foundation; either version 2 of the
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-License, or (at your option) any later version.
+   Library General Public License for more details.
-The GNU C Library is distributed in the hope that it will be useful,
+   You should have received a copy of the GNU Library General Public
-but WITHOUT ANY WARRANTY; without even the implied warranty of
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Library General Public License for more details.
+   Boston, MA 02111-1307, USA.  */
-You should have received a copy of the GNU Library General Public
+/* Copy no more than COUNT bytes of the null-terminated string from
 License along with the GNU C Library; see the file COPYING.LIB.  If
 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
 Cambridge, MA 02139, USA.  */
 /* Copy no more than COUNT bytes of the null-terminated string from 
   SRC to DST.  If SRC does not cover all of COUNT, the balance is
   zeroed.  Return the address of the terminating null in DEST, if
   any, else DEST + COUNT.  */
@ -32,8 +31,12 @@ Cambridge, MA 02139, USA.  */
 ENTRY(__stpncpy)
 	ldgp	gp, 0(pv)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
-	
+
 	beq	a2, $zerocount
 	jsr	t9, __stxncpy	# do the work of the copy
--- a/sysdeps/alpha/strcat.S
+++ b/sysdeps/alpha/strcat.S
@ -1,22 +1,21 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Richard Henderson (rth@tamu.edu)
-This file is part of the GNU C Library.
+   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
-The GNU C Library is free software; you can redistribute it and/or
+   The GNU C Library is distributed in the hope that it will be useful,
-modify it under the terms of the GNU Library General Public License as
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-published by the Free Software Foundation; either version 2 of the
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-License, or (at your option) any later version.
+   Library General Public License for more details.
-The GNU C Library is distributed in the hope that it will be useful,
+   You should have received a copy of the GNU Library General Public
-but WITHOUT ANY WARRANTY; without even the implied warranty of
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Library General Public License for more details.
+   Boston, MA 02111-1307, USA.  */
 You should have received a copy of the GNU Library General Public
 License along with the GNU C Library; see the file COPYING.LIB.  If
 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
 Cambridge, MA 02139, USA.  */
 /* Append a null-terminated string from SRC to DST.  */
@ -26,6 +25,10 @@ Cambridge, MA 02139, USA.  */
 ENTRY(strcat)
 	ldgp	gp, 0(pv)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
 	mov	a0, v0		# set up return value
@ -59,7 +62,7 @@ $found:	negq    t1, t2		# clear all but least set bit
 	addq	a0, t2, a0
 	/* Now do the append.  */
-	
+
 	jsr	t9, __stxcpy
 	ret
--- a/sysdeps/alpha/strchr.S
+++ b/sysdeps/alpha/strchr.S
@ -1,25 +1,24 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Richard Henderson (rth@tamu.edu)
-This file is part of the GNU C Library.
+   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
-The GNU C Library is free software; you can redistribute it and/or
+   The GNU C Library is distributed in the hope that it will be useful,
-modify it under the terms of the GNU Library General Public License as
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-published by the Free Software Foundation; either version 2 of the
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-License, or (at your option) any later version.
+   Library General Public License for more details.
-The GNU C Library is distributed in the hope that it will be useful,
+   You should have received a copy of the GNU Library General Public
-but WITHOUT ANY WARRANTY; without even the implied warranty of
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Library General Public License for more details.
+   Boston, MA 02111-1307, USA.  */
 You should have received a copy of the GNU Library General Public
 License along with the GNU C Library; see the file COPYING.LIB.  If
 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
 Cambridge, MA 02139, USA.  */
 /* Return the address of a given character within a null-terminated
-   string, or null if it is not found. 
+   string, or null if it is not found.
   This is generally scheduled for the EV5 (got to look out for my own
   interests :-), but with EV4 needs in mind.  There *should* be no more
@ -32,7 +31,14 @@ Cambridge, MA 02139, USA.  */
 	.set noat
 ENTRY(strchr)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	zapnot	a1, 1, a1	# e0    : zero extend the search character
 	ldq_u   t0, 0(a0)	# .. e1 : load first quadword
--- a/sysdeps/alpha/strcmp.S
+++ b/sysdeps/alpha/strcmp.S
@ -0,0 +1,195 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   Contributed by Richard Henderson (rth@tamu.edu)
   This file is part of the GNU C Library.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.
   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If
   not, write to the Free Software Foundation, Inc., 675 Mass Ave,
   Cambridge, MA 02139, USA.  */
 /* Bytewise compare two null-terminated strings.  */
 #include <sysdep.h>
 	.set noat
 	.set noreorder
 	.text
 ENTRY(strcmp)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jmp	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	ldq_u	t0, 0(a0)	# e0    : give cache time to catch up
 	xor	a0, a1, t2	# .. e1 : are s1 and s2 co-aligned?
 	ldq_u	t1, 0(a1)	# e0    :
 	and	t2, 7, t2	# .. e1 :
 	lda	t3, -1		# e0    :
 	bne	t2, $unaligned	# .. e1 :
 	/* On entry to this basic block:
 	   t0 == the first destination word for masking back in
 	   t1 == the first source word.
 	   t3 == -1.  */
 $aligned:
 	mskqh	t3, a0, t3	# e0    :
 	nop			# .. e1 :
 	ornot	t1, t3, t1	# e0    :
 	ornot	t0, t3, t0	# .. e1 :
 	cmpbge	zero, t1, t7	# e0    : bits set iff null found
 	bne	t7, $eos	# e1 (zdb)
 	/* Aligned compare main loop.
 	   On entry to this basic block:
 	   t0 == an s1 word.
 	   t1 == an s2 word not containing a null.  */
 $a_loop:
 	xor	t0, t1, t2	# e0	:
 	bne	t2, $wordcmp	# .. e1 (zdb)
 	ldq_u	t1, 8(a1)	# e0    :
 	ldq_u	t0, 8(a0)	# .. e1 :
 	addq	a1, 8, a1	# e0    :
 	addq	a0, 8, a0	# .. e1 :
 	cmpbge	zero, t1, t7	# e0    :
 	beq	t7, $a_loop	# .. e1 (zdb)
 	br	$eos		# e1    :
 	/* The two strings are not co-aligned.  Align s1 and cope.  */
 $unaligned:
 	and	a0, 7, t4	# e0    : find s1 misalignment
 	and	a1, 7, t5	# .. e1 : find s2 misalignment
 	subq	a1, t4, a1	# e0    :
 	/* If s2 misalignment is larger than s2 misalignment, we need
 	   extra startup checks to avoid SEGV.  */
 	cmplt	t4, t5, t8	# .. e1 :
 	beq	t8, $u_head	# e1    :
 	mskqh	t3, t5, t3	# e0    :
 	ornot	t1, t3, t3	# e0    :
 	cmpbge	zero, t3, t7	# e1    : is there a zero?
 	beq	t7, $u_head	# e1    :
 	/* We've found a zero in the first partial word of s2.  Align
 	   our current s1 and s2 words and compare what we've got.  */
 	extql	t1, t5, t1	# e0    :
 	extql	t0, a0, t0	# e0    :
 	cmpbge	zero, t1, t7	# .. e1 : find that zero again
 	br	$eos		# e1    : and finish up
 	.align 3
 $u_head:
 	/* We know just enough now to be able to assemble the first
 	   full word of s2.  We can still find a zero at the end of it.
 	   On entry to this basic block:
 	   t0 == first word of s1
 	   t1 == first partial word of s2.  */
 	ldq_u	t2, 8(a1)	# e0    : load second partial s2 word
 	lda	t3, -1		# .. e1 : create leading garbage mask
 	extql	t1, a1, t1	# e0    : create first s2 word
 	mskqh	t3, a0, t3	# e0    :
 	extqh	t2, a1, t4	# e0    :
 	ornot	t0, t3, t0	# .. e1 : kill s1 garbage
 	or	t1, t4, t1	# e0    : s2 word now complete
 	cmpbge	zero, t0, t7	# .. e1 : find zero in first s1 word
 	ornot	t1, t3, t1	# e0    : kill s2 garbage
 	lda	t3, -1		# .. e1 :
 	mskql	t3, a1, t3	# e0    : mask for s2[1] bits we have seen
 	bne	t7, $eos	# .. e1 :
 	xor	t0, t1, t4	# e0    : compare aligned words
 	bne	t4, $wordcmp	# .. e1 (zdb)
 	or	t2, t3, t3	# e0    :
 	cmpbge	zero, t3, t7	# e1    :
 	bne	t7, $u_final	# e1    :
 	/* Unaligned copy main loop.  In order to avoid reading too much,
 	   the loop is structured to detect zeros in aligned words from s2.
 	   This has, unfortunately, effectively pulled half of a loop
 	   iteration out into the head and half into the tail, but it does
 	   prevent nastiness from accumulating in the very thing we want
 	   to run as fast as possible.
 	   On entry to this basic block:
 	   t2 == the unshifted low-bits from the next s2 word.  */
 	.align 3
 $u_loop:
 	extql	t2, a1, t3	# e0    :
 	ldq_u	t2, 16(a1)	# .. e1 : load next s2 high bits
 	ldq_u	t0, 8(a0)	# e0    : load next s1 word
 	addq	a1, 8, a1	# .. e1 :
 	addq	a0, 8, a0	# e0    :
 	nop			# .. e1 :
 	extqh	t2, a1, t1	# e0    :
 	cmpbge	zero, t0, t7	# .. e1 : find zero in current s1 word
 	or	t1, t3, t1	# e0    :
 	bne	t7, $eos	# .. e1 :
 	xor	t0, t1, t4	# e0    : compare the words
 	bne	t4, $wordcmp	# .. e1 (zdb)
 	cmpbge	zero, t2, t4	# e0    : find zero in next low bits
 	beq	t4, $u_loop	# .. e1 (zdb)
 	/* We've found a zero in the low bits of the last s2 word.  Get
 	   the next s1 word and align them.  */
 $u_final:
 	ldq_u	t0, 8(a0)	# e1    :
 	extql	t2, a1, t1	# .. e0 :
 	cmpbge	zero, t1, t7	# e0    :
 	/* We've found a zero somewhere in a word we just read.
 	   On entry to this basic block:
 	   t0 == s1 word
 	   t1 == s2 word
 	   t7 == cmpbge mask containing the zero.  */
 	.align 3
 $eos:
 	negq	t7, t6		# e0    : create bytemask of valid data
 	and	t6, t7, t8	# e1    :
 	subq	t8, 1, t6	# e0    :
 	or	t6, t8, t7	# e1    :
 	zapnot	t0, t7, t0	# e0    : kill the garbage
 	zapnot	t1, t7, t1	# .. e1 :
 	xor	t0, t1, v0	# e0    : and compare
 	beq	v0, $done	# .. e1 :
 	/* Here we have two differing co-aligned words in t0 & t1.
           Bytewise compare them and return (t0 > t1 ? 1 : -1).  */
 $wordcmp:
 	cmpbge	t0, t1, t2	# e0    : comparison yields bit mask of ge
 	cmpbge	t1, t0, t3	# .. e1 :
 	xor	t2, t3, t0	# e0    : bits set iff t0/t1 bytes differ
 	negq	t0, t1		# e1    : clear all but least bit
 	and	t0, t1, t0	# e0    :
 	lda	v0, -1		# .. e1 :
 	and	t0, t2, t1	# e0    : was bit set in t0 > t1?
 	cmovne	t1, 1, v0	# .. e1 (zdb)
 $done:
 	ret			# e1    :
 	END(strcmp)
--- a/sysdeps/alpha/strcpy.S
+++ b/sysdeps/alpha/strcpy.S
@ -27,6 +27,10 @@ Cambridge, MA 02139, USA.  */
 ENTRY(strcpy)
 	ldgp	gp, 0(pv)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
 	mov	a0, v0		# set up return value
--- a/sysdeps/alpha/strlen.S
+++ b/sysdeps/alpha/strlen.S
@ -34,6 +34,15 @@ Cambridge, MA 02139, USA.  */
 	.set noat
 ENTRY(strlen)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	ldq_u   t0, 0(a0)	# load first quadword (a0 may be misaligned)
 	lda     t1, -1(zero)
 	insqh   t1, a0, t1
--- a/sysdeps/alpha/strncat.S
+++ b/sysdeps/alpha/strncat.S
@ -27,6 +27,10 @@ Cambridge, MA 02139, USA.  */
 ENTRY(strncat)
 	ldgp	gp, 0(pv)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
 	mov	a0, v0		# set up return value
--- a/sysdeps/alpha/strncmp.S
+++ b/sysdeps/alpha/strncmp.S
@ -0,0 +1,224 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   Contributed by Richard Henderson (rth@tamu.edu)
   This file is part of the GNU C Library.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.
   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If
   not, write to the Free Software Foundation, Inc., 675 Mass Ave,
   Cambridge, MA 02139, USA.  */
 /* Bytewise compare two null-terminated strings of length no longer than N.  */
 #include <sysdep.h>
 	.set noat
 	.set noreorder
 	.text
 ENTRY(strncmp)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	xor	a0, a1, t2	# e0    : are s1 and s2 co-aligned?
 	beq	a2, $zerolength	# .. e1 :
 	ldq_u	t0, 0(a0)	# e0    : give cache time to catch up
 	ldq_u	t1, 0(a1)	# .. e1 :
 	and	t2, 7, t2	# e0    :
 	and	a0, 7, t4	# .. e1 : find s1 misalignment
 	lda	t3, -1		# e0    :
 	addq	a2, t4, a2	# .. e1 : bias count by s1 misalignment
 	and	a2, 7, t10	# e1    : ofs of last byte in last word
 	srl	a2, 3, a2	# .. e0 : remaining full words in count
 	and	a1, 7, t5	# e0    : find s2 misalignment
 	bne	t2, $unaligned	# .. e1 :
 	/* On entry to this basic block:
 	   t0 == the first word of s1.
 	   t1 == the first word of s2.
 	   t3 == -1.  */
 $aligned:
 	mskqh	t3, a1, t3	# e0    : mask off leading garbage
 	nop			# .. e1 :
 	ornot	t1, t3, t1	# e0    :
 	ornot	t0, t3, t0	# .. e1 :
 	cmpbge	zero, t1, t7	# e0    : bits set iff null found
 	beq	a2, $eoc	# .. e1 : check end of count
 	unop			#       :
 	bne	t7, $eos	# e1    :
 	/* Aligned compare main loop.
 	   On entry to this basic block:
 	   t0 == an s1 word.
 	   t1 == an s2 word not containing a null.  */
 $a_loop:
 	xor	t0, t1, t2	# e0	:
 	bne	t2, $wordcmp	# .. e1 (zdb)
 	ldq_u	t1, 0(a1)	# e0    :
 	ldq_u	t0, 0(a0)	# .. e1 :
 	addq	a1, 8, a1	# e0    :
 	addq	a0, 8, a0	# .. e1 :
 	cmpbge	zero, t1, t7	# e0    :
 	beq	a2, $eoc	# .. e1 :
 	subq	a2, 1, a2	# e0    :
 	beq	t7, $a_loop	# .. e1 :
 	br	$eos		# e1    :
 	/* The two strings are not co-aligned.  Align s1 and cope.  */
 $unaligned:
 	subq	a1, t4, a1	# e0	 :
 	unop			#        :
 	/* If s2 misalignment is larger than s2 misalignment, we need
 	   extra startup checks to avoid SEGV.  */
 	cmplt	t4, t5, t8	# .. e1 :
 	beq	t8, $u_head	# e1    :
 	mskqh	t3, t5, t3	# e0    :
 	ornot	t1, t3, t3	# e0    :
 	cmpbge	zero, t3, t7	# e1    : is there a zero?
 	beq	t7, $u_head	# e1    :
 	/* We've found a zero in the first partial word of s2.  Align
 	   our current s1 and s2 words and compare what we've got.  */
 	extql	t1, t5, t1	# e0    :
 	lda	t3, -1		# .. e1 :
 	insql	t1, a0, t1	# e0    :
 	mskqh	t3, a0, t3	# e0    :
 	ornot	t1, t3, t1	# e0    :
 	ornot	t0, t3, t0	# .. e1 :
 	cmpbge	zero, t1, t7	# e0    : find that zero again
 	beq	a2, $eoc	# .. e1 : and finish up
 	br	$eos		# e1    :
 	.align 3
 $u_head:
 	/* We know just enough now to be able to assemble the first
 	   full word of s2.  We can still find a zero at the end of it.
 	   On entry to this basic block:
 	   t0 == first word of s1
 	   t1 == first partial word of s2.  */
 	ldq_u	t2, 8(a1)	# e0    : load second partial s2 word
 	lda	t3, -1		# .. e1 : create leading garbage mask
 	extql	t1, a1, t1	# e0    : create first s2 word
 	mskqh	t3, a0, t3	# e0    :
 	extqh	t2, a1, t4	# e0    :
 	ornot	t0, t3, t0	# .. e1 : kill s1 garbage
 	or	t1, t4, t1	# e0    : s2 word now complete
 	ornot	t1, t3, t1	# e1    : kill s2 garbage
 	cmpbge	zero, t0, t7	# e0    : find zero in first s1 word
 	beq	a2, $eoc	# .. e1 :
 	lda	t3, -1		# e0    :
 	bne	t7, $eos	# .. e1 :
 	subq	a2, 1, a2	# e0    :
 	xor	t0, t1, t4	# .. e1 : compare aligned words
 	mskql	t3, a1, t3	# e0    : mask out s2[1] bits we have seen
 	bne	t4, $wordcmp	# .. e1 :
 	or	t2, t3, t3	# e0    :
 	cmpbge	zero, t3, t7	# e1    : find zero in high bits of s2[1]
 	bne	t7, $u_final	# e1    :
 	/* Unaligned copy main loop.  In order to avoid reading too much,
 	   the loop is structured to detect zeros in aligned words from s2.
 	   This has, unfortunately, effectively pulled half of a loop
 	   iteration out into the head and half into the tail, but it does
 	   prevent nastiness from accumulating in the very thing we want
 	   to run as fast as possible.
 	   On entry to this basic block:
 	   t2 == the unshifted low-bits from the next s2 word.  */
 	.align 3
 $u_loop:
 	extql	t2, a1, t3	# e0    :
 	ldq_u	t2, 16(a1)	# .. e1 : load next s2 high bits
 	ldq_u	t0, 8(a0)	# e0    : load next s1 word
 	addq	a1, 8, a1	# .. e1 :
 	addq	a0, 8, a0	# e0    :
 	nop			# .. e1 :
 	extqh	t2, a1, t1	# e0    :
 	cmpbge	zero, t0, t7	# .. e1 : find zero in current s1 word
 	or	t1, t3, t1	# e0    :
 	beq	a2, $eoc	# .. e1 : check for end of count
 	subq	a2, 1, a2	# e0    :
 	bne	t7, $eos	# .. e1 :
 	xor	t0, t1, t4	# e0    : compare the words
 	bne	t4, $wordcmp	# .. e1 (zdb)
 	cmpbge	zero, t2, t4	# e0    : find zero in next low bits
 	beq	t4, $u_loop	# .. e1 (zdb)
 	/* We've found a zero in the low bits of the last s2 word.  Get
 	   the next s1 word and align them.  */
 $u_final:
 	ldq_u	t0, 8(a0)	# e1    :
 	extql	t2, a1, t1	# .. e0 :
 	cmpbge	zero, t1, t7	# e0    :
 	bne	a2, $eos	# .. e1 :
 	/* We've hit end of count.  Zero everything after the count
 	   and compare whats left.  */
 	.align 3
 $eoc:
 	mskql	t0, t10, t0
 	mskql	t1, t10, t1
 	/* We've found a zero somewhere in a word we just read.
 	   On entry to this basic block:
 	   t0 == s1 word
 	   t1 == s2 word
 	   t7 == cmpbge mask containing the zero.  */
 $eos:
 	negq	t7, t6		# e0    : create bytemask of valid data
 	and	t6, t7, t8	# e1    :
 	subq	t8, 1, t6	# e0    :
 	or	t6, t8, t7	# e1    :
 	zapnot	t0, t7, t0	# e0    : kill the garbage
 	zapnot	t1, t7, t1	# .. e1 :
 	xor	t0, t1, v0	# e0    : and compare
 	beq	v0, $done	# .. e1 :
 	/* Here we have two differing co-aligned words in t0 & t1.
           Bytewise compare them and return (t0 > t1 ? 1 : -1).  */
 $wordcmp:
 	cmpbge	t0, t1, t2	# e0    : comparison yields bit mask of ge
 	cmpbge	t1, t0, t3	# .. e1 :
 	xor	t2, t3, t0	# e0    : bits set iff t0/t1 bytes differ
 	negq	t0, t1		# e1    : clear all but least bit
 	and	t0, t1, t0	# e0    :
 	lda	v0, -1		# .. e1 :
 	and	t0, t2, t1	# e0    : was bit set in t0 > t1?
 	cmovne	t1, 1, v0	# .. e1 (zdb)
 $done:
 	ret			# e1    :
 $zerolength:
 	clr	v0
 	ret
 	END(strncmp)
--- a/sysdeps/alpha/strncpy.S
+++ b/sysdeps/alpha/strncpy.S
@ -31,6 +31,10 @@ Cambridge, MA 02139, USA.  */
 ENTRY(strncpy)
 	ldgp	gp, 0(pv)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
 	mov	a0, v0		# set return value now
--- a/sysdeps/alpha/strrchr.S
+++ b/sysdeps/alpha/strrchr.S
@ -31,7 +31,14 @@ Cambridge, MA 02139, USA.  */
 	.set noat
 ENTRY(strrchr)
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
 	.prologue 0
 #endif
 	zapnot	a1, 1, a1	# e0    : zero extend our test character
 	mov	zero, t6	# .. e1 : t6 is last match aligned addr
--- a/sysdeps/alpha/udiv_qrnnd.S
+++ b/sysdeps/alpha/udiv_qrnnd.S
@ -27,8 +27,15 @@
 	.text
 LEAF(__udiv_qrnnd, 0)
-
+#ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.prologue 1
 #else
        .prologue 0
 #endif
 #define cnt	$2
 #define tmp	$3
 #define rem_ptr	$16
@ -38,9 +45,9 @@ LEAF(__udiv_qrnnd, 0)
 #define qb	$20
 	ldiq	cnt,16
-	blt	d,.Largedivisor
+	blt	d,$largedivisor
-.Loop1:	cmplt	n0,0,tmp
+$loop1:	cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
@ -73,12 +80,12 @@ LEAF(__udiv_qrnnd, 0)
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	subq	cnt,1,cnt
-	bgt	cnt,.Loop1
+	bgt	cnt,$loop1
 	stq	n1,0(rem_ptr)
 	bis	$31,n0,$0
 	ret	$31,($26),1
-.Largedivisor:
+$largedivisor:
 	and	n0,1,$4
 	srl	n0,1,n0
@ -90,7 +97,7 @@ LEAF(__udiv_qrnnd, 0)
 	srl	d,1,$5
 	addq	$5,$6,$5
-.Loop2:	cmplt	n0,0,tmp
+$loop2:	cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
@ -123,27 +130,30 @@ LEAF(__udiv_qrnnd, 0)
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	subq	cnt,1,cnt
-	bgt	cnt,.Loop2
+	bgt	cnt,$loop2
 	addq	n1,n1,n1
 	addq	$4,n1,n1
-	bne	$6,.LOdd
+	bne	$6,$Odd
 	stq	n1,0(rem_ptr)
 	bis	$31,n0,$0
 	ret	$31,($26),1
-.LOdd:
+$Odd:
 	/* q' in n0. r' in n1 */
 	addq	n1,n0,n1
 	cmpult	n1,n0,tmp	# tmp := carry from addq
-	beq	tmp,.LLp6
+	subq	n1,d,AT
-	addq	n0,1,n0
+	addq	n0,tmp,n0
-	subq	n1,d,n1
+	cmovne	tmp,AT,n1
-.LLp6:	cmpult	n1,d,tmp
+
-	bne	tmp,.LLp7
+	cmpult	n1,d,tmp
-	addq	n0,1,n0
+	addq	n0,1,AT
-	subq	n1,d,n1
+	cmoveq	tmp,AT,n0
-.LLp7:
+	subq	n1,d,AT
 	cmoveq	tmp,AT,n1
 	stq	n1,0(rem_ptr)
 	bis	$31,n0,$0
 	ret	$31,($26),1
--- a/sysdeps/alpha/w_sqrt.S
+++ b/sysdeps/alpha/w_sqrt.S
@ -0,0 +1,161 @@
 /* Copyright (C) 1996 Free Software Foundation, Inc.
   Contributed by David Mosberger (davidm@cs.arizona.edu).
   Based on public-domain C source by Linus Torvalds.
   This file is part of the GNU C Library.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.
   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If
   not, write to the Free Software Foundation, Inc., 675 Mass Ave,
   Cambridge, MA 02139, USA.  */
 /* This version is much faster than generic sqrt implementation, but
   it doesn't handle exceptional values or the inexact flag.  Don't use
   this if _IEEE_FP or _IEEE_FP_INEXACT is in effect. */
 #ifndef _IEEE_FP
 #include <errnos.h>
 #include <sysdep.h>
 	.set noreorder
 #ifdef __ELF__
 	.section .rodata
 #else
 	.rdata
 #endif
 	.align 5        # align to cache line
 	/* Do all memory accesses relative to sqrtdata.  */
 sqrtdata:
 #define DN                     0x00
 #define UP                     0x08
 #define HALF                   0x10
 #define ALMOST_THREE_HALF      0x18
 #define T2                     0x20
 	.quad 0x3fefffffffffffff        /* DN = next(1.0) */
 	.quad 0x3ff0000000000001        /* UP = prev(1.0) */
 	.quad 0x3fe0000000000000        /* HALF = 0.5 */
 	.quad 0x3ff7ffffffc00000        /* ALMOST_THREE_HALF = 1.5-2^-30 */
 /* table T2: */
 .long   0x1500, 0x2ef8,   0x4d67,  0x6b02,  0x87be,  0xa395,  0xbe7a,  0xd866
 .long   0xf14a, 0x1091b, 0x11fcd, 0x13552, 0x14999, 0x15c98, 0x16e34, 0x17e5f
 .long  0x18d03, 0x19a01, 0x1a545, 0x1ae8a, 0x1b5c4, 0x1bb01, 0x1bfde, 0x1c28d
 .long  0x1c2de, 0x1c0db, 0x1ba73, 0x1b11c, 0x1a4b5, 0x1953d, 0x18266, 0x16be0
 .long  0x1683e, 0x179d8, 0x18a4d, 0x19992, 0x1a789, 0x1b445, 0x1bf61, 0x1c989
 .long  0x1d16d, 0x1d77b, 0x1dddf, 0x1e2ad, 0x1e5bf, 0x1e6e8, 0x1e654, 0x1e3cd
 .long  0x1df2a, 0x1d635, 0x1cb16, 0x1be2c, 0x1ae4e, 0x19bde, 0x1868e, 0x16e2e
 .long  0x1527f, 0x1334a, 0x11051,  0xe951,  0xbe01,  0x8e0d,  0x5924,  0x1edd
 /*
 * Stack variables:
 */
 #define K      16(sp)
 #define Y      24(sp)
 #define FSIZE  32
 	.text
 LEAF(__sqrt, FSIZE)
 	lda	sp, -FSIZE(sp)
 	ldgp	gp, .-__sqrt(pv)
 	stq	ra, 0(sp)
 #ifdef PROF
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 #endif
 	.prologue 1
 	stt	$f16, K
 	lda	t3, sqrtdata			# load base address into t3
 	fblt	$f16, $negative
 	/* Compute initial guess.  */
 	.align 3
 	ldah	t1, 0x5fe8			# e0    :
 	ldq	t2, K				# .. e1 :
 	ldt	$f12, HALF(t3)			# e0    :
 	ldt	$f18, ALMOST_THREE_HALF(t3)	# .. e1 :
 	srl	t2, 33, t0			# e0    :
 	mult	$f16, $f12, $f11		# .. fm : $f11 = x * 0.5
 	subl	t1, t0, t1			# e0    :
 	addt	$f12, $f12, $f17		# .. fa : $f17 = 1.0
 	srl	t1, 12, t0			# e0    :
 	and	t0, 0xfc, t0			# .. e1 :
 	addq	t0, t3, t0			# e0    :
 	ldl	t0, T2(t0)			# .. e1 :
 	addt	$f12, $f17, $f15		# fa    : $f15 = 1.5
 	subl	t1, t0, t1			# .. e1 :
 	sll	t1, 32, t1			# e0    :
 	ldt	$f14, DN(t3)			# .. e1 :
 	stq	t1, Y				# e0    :
 	ldt	$f13, Y				# e1    :
 	addq	sp, FSIZE, sp			# e0    :
 	mult	$f11, $f13, $f10	# fm    : $f10 = (x * 0.5) * y
 	mult	$f10, $f13, $f10	# fm    : $f10 = ((x * 0.5) * y) * y
 	subt	$f15, $f10, $f1		# fa    : $f1 = (1.5 - 0.5*x*y*y)
 	mult	$f13, $f1, $f13         # fm    : yp = y*(1.5 - 0.5*x*y*y)
 	mult	$f11, $f13, $f11	# fm    : $f11 = x * 0.5 * yp
 	mult	$f11, $f13, $f11	# fm    : $f11 = (x * 0.5 * yp) * yp
 	subt	$f18, $f11, $f1		# fa    : $f1= (1.5-2^-30) - 0.5*x*yp*yp
 	mult	$f13, $f1, $f13		# fm    : ypp = $f13 = yp*$f1
 	subt	$f15, $f12, $f1		# fa    : $f1 = (1.5 - 0.5)
 	ldt	$f15, UP(t3)		# .. e1 :
 	mult	$f16, $f13, $f10	# fm    : z = $f10 = x * ypp
 	mult	$f10, $f13, $f11	# fm    : $f11 = z*ypp
 	mult	$f10, $f12, $f12	# fm    : $f12 = z*0.5
 	subt	$f1, $f11, $f1		# .. fa : $f1 = 1 - z*ypp
 	mult	$f12, $f1, $f12		# fm    : $f12 = z*0.5*(1 - z*ypp)
 	addt	$f10, $f12, $f0		# fa    : zp=res=$f0= z + z*0.5*(1 - z*ypp)
 	mult/c	$f0, $f14, $f12		# fm    : zmi = zp * DN
 	mult/c	$f0, $f15, $f11		# fm    : zpl = zp * UP
 	mult/c	$f0, $f12, $f1		# fm    : $f1 = zp * zmi
 	mult/c	$f0, $f11, $f15		# fm    : $f15 = zp * zpl
 	subt    $f1, $f16, $f13		# fa    : y1 = zp*zmi - x
 	subt    $f15, $f16, $f15	# fa    : y2 = zp*zpl - x
 	fcmovge	$f13, $f12, $f0		# res = (y1 >= 0) ? zmi : res
 	fcmovlt	$f15, $f11, $f0		# res = (y2 <  0) ? zpl : res
 	ret
 $negative:
 	lda	t1, -1
 	stq	t1, K
 	lda	t1, EDOM
 	stl	t1, errno
 #ifdef _LIBC_REENTRANT
 	jsr	ra, __errno_location
 	lda	t1, -1
 	ldq	ra, 0(sp)
 	stl	t1, 0(v0)
 #endif
 	ldt	$f0, K			# res = (double) 0xffffffffffffffff
 	addq	sp, FSIZE, sp
 	ret
 	END(__sqrt)
 weak_alias(__sqrt, sqrt)
 #endif /* !_IEEE_FP */
		`@ -0,0 +1 @@`
							`/* lldiv is the same as ldiv on the Alpha. */`