mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-16 01:50:11 +00:00
Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>
* malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include <errno.h>. * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de> * malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include <errno.h>. * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up.
This commit is contained in:
parent
5d82cf5c55
commit
8f5ca04bc7
@ -5,7 +5,7 @@ glibc-*
|
||||
|
||||
configparms
|
||||
|
||||
sun4 i386 i386-gnuelf hp300-netbsd hp300 i486-linux
|
||||
sun[43]* i[345]86* hp300*
|
||||
|
||||
ieeetest hppa-sysdeps regex
|
||||
|
||||
|
40
ChangeLog
40
ChangeLog
@ -1,3 +1,43 @@
|
||||
Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>
|
||||
|
||||
* malloc/malloc.c (_malloc_internal): Performance fix. Move
|
||||
if statement out of loop.
|
||||
|
||||
* stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster
|
||||
implementation using GMP functions. Contributed by
|
||||
Torbjorn Granlund and Ulrich Drepper.
|
||||
|
||||
* stdio/test_rdwr.c: Include <errno.h>.
|
||||
|
||||
* sysdeps/i386/i586/Implies: New file.
|
||||
|
||||
New highly optimized string functions for i[345]86.
|
||||
* sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files.
|
||||
* sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files.
|
||||
* sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files.
|
||||
* sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files.
|
||||
* sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files.
|
||||
* sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files.
|
||||
* sysdeps/i386/i586/strlen.S: New file.
|
||||
* sysdeps/i386/memchr.c: Removed. There is now an assembler version.
|
||||
|
||||
* sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did
|
||||
not correspond to used values.
|
||||
|
||||
* sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper
|
||||
around a kernel header file.
|
||||
* sysdeps/unix/sysv/linux/Dist: Add it.
|
||||
* sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers):
|
||||
Likewise.
|
||||
|
||||
* sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of
|
||||
defining ourself we use a kernel header file.
|
||||
|
||||
* sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system
|
||||
call handler for i586.
|
||||
|
||||
* sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up.
|
||||
|
||||
Wed Oct 11 00:00:00 1995 Roland McGrath <roland@churchy.gnu.ai.mit.edu>
|
||||
|
||||
* sysdeps/i386/dl-machine.h (elf_machine_rel): Use +=, not =, to
|
||||
|
28
configure.in
28
configure.in
@ -82,22 +82,18 @@ changequote(,)dnl
|
||||
# Expand the configuration machine name into a subdirectory by architecture
|
||||
# type and particular chip.
|
||||
case "$machine" in
|
||||
i[345]86)
|
||||
machine=i386/$machine ;;
|
||||
sparc[6789])
|
||||
machine=sparc/$machine ;;
|
||||
m68k)
|
||||
machine=m68k/m68020 ;;
|
||||
m680?0)
|
||||
machine=m68k/$machine ;;
|
||||
m88k)
|
||||
machine=m88k/m88100 ;;
|
||||
m88???)
|
||||
machine=m88k/$machine ;;
|
||||
mips64*)
|
||||
machine=mips/mips64/$machine ;;
|
||||
mips*)
|
||||
machine=mips/$machine ;;
|
||||
a29k | am29000) machine=a29k ;;
|
||||
alpha*) machine=alpha/$machine ;;
|
||||
hppa*) machine=hppa/$machine ;;
|
||||
i[345]86) machine=i386/$machine ;;
|
||||
m680?0) machine=m68k/$machine ;;
|
||||
m68k) machine=m68k/m68020 ;;
|
||||
m88???) machine=m88k/$machine ;;
|
||||
m88k) machine=m88k/m88100 ;;
|
||||
mips*) machine=mips/$machine ;;
|
||||
mips64*) machine=mips/mips64/$machine ;;
|
||||
sparc[6789]) machine=sparc/$machine ;;
|
||||
supersparc) machine=sparc/sparc8 ;;
|
||||
esac
|
||||
|
||||
# Make sco3.2v4 become sco3.2.4 and sunos4.1.1_U1 become sunos4.1.1.U1.
|
||||
|
@ -26,7 +26,7 @@ include ../Makeconfig
|
||||
|
||||
headers = hurd.h $(interface-headers) \
|
||||
$(addprefix hurd/,fd.h id.h port.h signal.h userlink.h \
|
||||
resource.h threadvar.h)
|
||||
resource.h threadvar.h lookup.h)
|
||||
|
||||
distribute := hurdstartup.h hurdfault.h intr-rpc.defs STATUS
|
||||
|
||||
@ -44,7 +44,7 @@ routines = hurdstartup hurdinit \
|
||||
setauth \
|
||||
pid2task task2pid \
|
||||
getuids setuids getumask fchroot \
|
||||
hurdsock hurdauth invoke-trans \
|
||||
hurdsock hurdauth \
|
||||
privports \
|
||||
msgportdemux \
|
||||
fopenport \
|
||||
|
69
hurd/hurd.h
69
hurd/hurd.h
@ -77,11 +77,16 @@ extern struct hurd_port *_hurd_ports;
|
||||
extern unsigned int _hurd_nports;
|
||||
extern volatile mode_t _hurd_umask;
|
||||
|
||||
/* Shorthand macro for referencing _hurd_ports (see <hurd/port.h>). */
|
||||
/* Shorthand macro for internal library code referencing _hurd_ports (see
|
||||
<hurd/port.h>). */
|
||||
|
||||
#define __USEPORT(which, expr) \
|
||||
HURD_PORT_USE (&_hurd_ports[INIT_PORT_##which], (expr))
|
||||
|
||||
/* Function version of __USEPORT: calls OPERATE with a send right. */
|
||||
|
||||
extern error_t _hurd_ports_use (int which, error_t (*operate) (mach_port_t));
|
||||
|
||||
|
||||
/* Base address and size of the initial stack set up by the exec server.
|
||||
If using cthreads, this stack is deallocated in startup.
|
||||
@ -150,52 +155,6 @@ extern int setcttyid (mach_port_t);
|
||||
extern int __setauth (auth_t), setauth (auth_t);
|
||||
|
||||
|
||||
/* Split FILE into a directory and a name within the directory. Look up a
|
||||
port for the directory and store it in *DIR; store in *NAME a pointer
|
||||
into FILE where the name within directory begins. The directory lookup
|
||||
uses CRDIR for the root directory and CWDIR for the current directory.
|
||||
Returns zero on success or an error code. */
|
||||
|
||||
extern error_t __hurd_file_name_split (file_t crdir, file_t cwdir,
|
||||
const char *file,
|
||||
file_t *dir, char **name);
|
||||
extern error_t hurd_file_name_split (file_t crdir, file_t cwdir,
|
||||
const char *file,
|
||||
file_t *dir, char **name);
|
||||
|
||||
/* Open a port to FILE with the given FLAGS and MODE (see <fcntl.h>).
|
||||
The file lookup uses CRDIR for the root directory and CWDIR for the
|
||||
current directory. If successful, returns zero and store the port
|
||||
to FILE in *PORT; otherwise returns an error code. */
|
||||
|
||||
extern error_t __hurd_file_name_lookup (file_t crdir, file_t cwdir,
|
||||
const char *file,
|
||||
int flags, mode_t mode,
|
||||
file_t *port);
|
||||
extern error_t hurd_file_name_lookup (file_t crdir, file_t cwdir,
|
||||
const char *filename,
|
||||
int flags, mode_t mode,
|
||||
file_t *port);
|
||||
|
||||
/* Process the values returned by `dir_lookup' et al, and loop doing
|
||||
`dir_lookup' calls until one returns FS_RETRY_NONE. CRDIR is the
|
||||
root directory used for things like symlinks to absolute file names; the
|
||||
other arguments should be those just passed to and/or returned from
|
||||
`dir_lookup', `fsys_getroot', or `file_invoke_translator'. This
|
||||
function consumes the reference in *RESULT even if it returns an error. */
|
||||
|
||||
extern error_t __hurd_file_name_lookup_retry (file_t crdir,
|
||||
enum retry_type doretry,
|
||||
char retryname[1024],
|
||||
int flags, mode_t mode,
|
||||
file_t *result);
|
||||
extern error_t hurd_file_name_lookup_retry (file_t crdir,
|
||||
enum retry_type doretry,
|
||||
char retryname[1024],
|
||||
int flags, mode_t mode,
|
||||
file_t *result);
|
||||
|
||||
|
||||
/* Split FILE into a directory and a name within the directory. The
|
||||
directory lookup uses the current root and working directory. If
|
||||
successful, stores in *NAME a pointer into FILE where the name
|
||||
@ -213,15 +172,15 @@ extern file_t file_name_split (const char *file, char **name);
|
||||
extern file_t __file_name_lookup (const char *file, int flags, mode_t mode);
|
||||
extern file_t file_name_lookup (const char *file, int flags, mode_t mode);
|
||||
|
||||
/* Invoke any translator set on the node FILE represents, and return in
|
||||
*TRANSLATED a port to the translated node. FLAGS are as for
|
||||
`dir_lookup' et al, but the returned port will not necessarily have
|
||||
any more access rights than FILE does. */
|
||||
/* Open a port to FILE with the given FLAGS and MODE (see <fcntl.h>). The
|
||||
file lookup uses the current root directory, but uses STARTDIR as the
|
||||
"working directory" for file relative names. Returns a port to the file
|
||||
if successful; otherwise sets `errno' and returns MACH_PORT_NULL. */
|
||||
|
||||
extern error_t __hurd_invoke_translator (file_t file, int flags,
|
||||
file_t *translated);
|
||||
extern error_t hurd_invoke_translator (file_t file, int flags,
|
||||
file_t *translated);
|
||||
extern file_t __file_name_lookup_under (file_t startdir, const char *file,
|
||||
int flags, mode_t mode);
|
||||
extern file_t file_name_lookup_under (file_t startdir, const char *file,
|
||||
int flags, mode_t mode);
|
||||
|
||||
|
||||
/* Open a file descriptor on a port. FLAGS are as for `open'; flags
|
||||
|
@ -31,6 +31,12 @@ struct hurd_port *_hurd_ports;
|
||||
unsigned int _hurd_nports;
|
||||
mode_t _hurd_umask;
|
||||
|
||||
error_t
|
||||
_hurd_ports_use (int which, error_t (*operate) (mach_port_t))
|
||||
{
|
||||
return HURD_PORT_USE (&_hurd_ports[which], (*operate) (port));
|
||||
}
|
||||
|
||||
void _hurd_proc_init (char **argv);
|
||||
|
||||
DEFINE_HOOK (_hurd_subinit, (void));
|
||||
|
405
stdio/_itoa.c
405
stdio/_itoa.c
@ -1,6 +1,8 @@
|
||||
/* Internal function for converting integers to ASCII.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Torbjorn Granlund <tege@matematik.su.se>
|
||||
and Ulrich Drepper <drepper@gnu.ai.mit.edu>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
@ -17,13 +19,400 @@ License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
|
||||
Cambridge, MA 02139, USA. */
|
||||
|
||||
/* Lower-case digits. */
|
||||
const char _itoa_lower_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
/* Upper-case digits. */
|
||||
const char _itoa_upper_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
|
||||
/* Cause _itoa.h to define _itoa as a real function instead of an
|
||||
`extern inline'. */
|
||||
#define _EXTERN_INLINE /* empty */
|
||||
#include <gmp-mparam.h>
|
||||
#include "../stdlib/gmp.h"
|
||||
#include "../stdlib/gmp-impl.h"
|
||||
#include "../stdlib/longlong.h"
|
||||
|
||||
#include "_itoa.h"
|
||||
|
||||
|
||||
/* Canonize environment. For some architectures not all values might
|
||||
be defined in the GMP header files. */
|
||||
#ifndef UMUL_TIME
|
||||
# define UMUL_TIME 1
|
||||
#endif
|
||||
#ifndef UDIV_TIME
|
||||
# define UDIV_TIME 1
|
||||
#endif
|
||||
|
||||
/* Control memory layout. */
|
||||
#ifdef PACK
|
||||
# undef PACK
|
||||
# define PACK __attribute__ ((packed))
|
||||
#else
|
||||
# define PACK
|
||||
#endif
|
||||
|
||||
|
||||
/* Declare local types. */
|
||||
struct base_table_t
|
||||
{
|
||||
#if (UDIV_TIME > 2 * UMUL_TIME)
|
||||
mp_limb base_multiplier;
|
||||
#endif
|
||||
char flag;
|
||||
char post_shift;
|
||||
#if BITS_PER_MP_LIMB == 32
|
||||
struct
|
||||
{
|
||||
char normalization_steps;
|
||||
char ndigits;
|
||||
mp_limb base PACK;
|
||||
#if UDIV_TIME > 2 * UMUL_TIME
|
||||
mp_limb base_ninv PACK;
|
||||
#endif
|
||||
} big;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* To reduce the memory needed we include some fields of the tables
|
||||
only confitionally. */
|
||||
#if BITS_PER_MP_LIMB == 32
|
||||
# if UDIV_TIME > 2 * UMUL_TIME
|
||||
# define SEL1(X) X,
|
||||
# define SEL2(X) ,X
|
||||
# else
|
||||
# define SEL1(X)
|
||||
# define SEL2(X)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/* Local variables. */
|
||||
static const struct base_table_t base_table[] =
|
||||
{
|
||||
#if BITS_PER_MP_LIMB == 64
|
||||
/* 2 */ {0ul, 1, 1},
|
||||
/* 3 */ {0xaaaaaaaaaaaaaaabul, 0, 1},
|
||||
/* 4 */ {0ul, 1, 2},
|
||||
/* 5 */ {0xcccccccccccccccdul, 0, 2},
|
||||
/* 6 */ {0xaaaaaaaaaaaaaaabul, 0, 2},
|
||||
/* 7 */ {0x2492492492492493ul, 1, 3},
|
||||
/* 8 */ {0ul, 1, 3},
|
||||
/* 9 */ {0xe38e38e38e38e38ful, 0, 3},
|
||||
/* 10 */ {0xcccccccccccccccdul, 0, 3},
|
||||
/* 11 */ {0x2e8ba2e8ba2e8ba3ul, 0, 1},
|
||||
/* 12 */ {0xaaaaaaaaaaaaaaabul, 0, 3},
|
||||
/* 13 */ {0x4ec4ec4ec4ec4ec5ul, 0, 2},
|
||||
/* 14 */ {0x2492492492492493ul, 1, 4},
|
||||
/* 15 */ {0x8888888888888889ul, 0, 3},
|
||||
/* 16 */ {0ul, 1, 4},
|
||||
/* 17 */ {0xf0f0f0f0f0f0f0f1ul, 0, 4},
|
||||
/* 18 */ {0xe38e38e38e38e38ful, 0, 4},
|
||||
/* 19 */ {0xd79435e50d79435ful, 0, 4},
|
||||
/* 20 */ {0xcccccccccccccccdul, 0, 4},
|
||||
/* 21 */ {0x8618618618618619ul, 1, 5},
|
||||
/* 22 */ {0x2e8ba2e8ba2e8ba3ul, 0, 2},
|
||||
/* 23 */ {0x642c8590b21642c9ul, 1, 5},
|
||||
/* 24 */ {0xaaaaaaaaaaaaaaabul, 0, 4},
|
||||
/* 25 */ {0x47ae147ae147ae15ul, 1, 5},
|
||||
/* 26 */ {0x4ec4ec4ec4ec4ec5ul, 0, 3},
|
||||
/* 27 */ {0x97b425ed097b425ful, 0, 4},
|
||||
/* 28 */ {0x2492492492492493ul, 1, 5},
|
||||
/* 29 */ {0x1a7b9611a7b9611bul, 1, 5},
|
||||
/* 30 */ {0x8888888888888889ul, 0, 4},
|
||||
/* 31 */ {0x0842108421084211ul, 1, 5},
|
||||
/* 32 */ {0ul, 1, 5},
|
||||
/* 33 */ {0x0f83e0f83e0f83e1ul, 0, 1},
|
||||
/* 34 */ {0xf0f0f0f0f0f0f0f1ul, 0, 5},
|
||||
/* 35 */ {0xea0ea0ea0ea0ea0ful, 0, 5},
|
||||
/* 36 */ {0xe38e38e38e38e38ful, 0, 5}
|
||||
#endif
|
||||
#if BITS_PER_MP_LIMB == 32
|
||||
/* 2 */ {SEL1(0ul) 1, 1, {0, 31, 0x80000000ul SEL2(0xfffffffful)}},
|
||||
/* 3 */ {SEL1(0xaaaaaaabul) 0, 1, {0, 20, 0xcfd41b91ul SEL2(0x3b563c24ul)}},
|
||||
/* 4 */ {SEL1(0ul) 1, 2, {1, 15, 0x40000000ul SEL2(0xfffffffful)}},
|
||||
/* 5 */ {SEL1(0xcccccccdul) 0, 2, {1, 13, 0x48c27395ul SEL2(0xc25c2684ul)}},
|
||||
/* 6 */ {SEL1(0xaaaaaaabul) 0, 2, {0, 12, 0x81bf1000ul SEL2(0xf91bd1b6ul)}},
|
||||
/* 7 */ {SEL1(0x24924925ul) 1, 3, {1, 11, 0x75db9c97ul SEL2(0x1607a2cbul)}},
|
||||
/* 8 */ {SEL1(0ul) 1, 3, {1, 10, 0x40000000ul SEL2(0xfffffffful)}},
|
||||
/* 9 */ {SEL1(0x38e38e39ul) 0, 1, {0, 10, 0xcfd41b91ul SEL2(0x3b563c24ul)}},
|
||||
/* 10 */ {SEL1(0xcccccccdul) 0, 3, {2, 9, 0x3b9aca00ul SEL2(0x12e0be82ul)}},
|
||||
/* 11 */ {SEL1(0xba2e8ba3ul) 0, 3, {0, 9, 0x8c8b6d2bul SEL2(0xd24cde04ul)}},
|
||||
/* 12 */ {SEL1(0xaaaaaaabul) 0, 3, {3, 8, 0x19a10000ul SEL2(0x3fa39ab5ul)}},
|
||||
/* 13 */ {SEL1(0x4ec4ec4ful) 0, 2, {2, 8, 0x309f1021ul SEL2(0x50f8ac5ful)}},
|
||||
/* 14 */ {SEL1(0x24924925ul) 1, 4, {1, 8, 0x57f6c100ul SEL2(0x74843b1eul)}},
|
||||
/* 15 */ {SEL1(0x88888889ul) 0, 3, {0, 8, 0x98c29b81ul SEL2(0xad0326c2ul)}},
|
||||
/* 16 */ {SEL1(0ul) 1, 4, {3, 7, 0x10000000ul SEL2(0xfffffffful)}},
|
||||
/* 17 */ {SEL1(0xf0f0f0f1ul) 0, 4, {3, 7, 0x18754571ul SEL2(0x4ef0b6bdul)}},
|
||||
/* 18 */ {SEL1(0x38e38e39ul) 0, 2, {2, 7, 0x247dbc80ul SEL2(0xc0fc48a1ul)}},
|
||||
/* 19 */ {SEL1(0xaf286bcbul) 1, 5, {2, 7, 0x3547667bul SEL2(0x33838942ul)}},
|
||||
/* 20 */ {SEL1(0xcccccccdul) 0, 4, {1, 7, 0x4c4b4000ul SEL2(0xad7f29abul)}},
|
||||
/* 21 */ {SEL1(0x86186187ul) 1, 5, {1, 7, 0x6b5a6e1dul SEL2(0x313c3d15ul)}},
|
||||
/* 22 */ {SEL1(0xba2e8ba3ul) 0, 4, {0, 7, 0x94ace180ul SEL2(0xb8cca9e0ul)}},
|
||||
/* 23 */ {SEL1(0xb21642c9ul) 0, 4, {0, 7, 0xcaf18367ul SEL2(0x42ed6de9ul)}},
|
||||
/* 24 */ {SEL1(0xaaaaaaabul) 0, 4, {4, 6, 0x0b640000ul SEL2(0x67980e0bul)}},
|
||||
/* 25 */ {SEL1(0x51eb851ful) 0, 3, {4, 6, 0x0e8d4a51ul SEL2(0x19799812ul)}},
|
||||
/* 26 */ {SEL1(0x4ec4ec4ful) 0, 3, {3, 6, 0x1269ae40ul SEL2(0xbce85396ul)}},
|
||||
/* 27 */ {SEL1(0x2f684bdbul) 1, 5, {3, 6, 0x17179149ul SEL2(0x62c103a9ul)}},
|
||||
/* 28 */ {SEL1(0x24924925ul) 1, 5, {3, 6, 0x1cb91000ul SEL2(0x1d353d43ul)}},
|
||||
/* 29 */ {SEL1(0x8d3dcb09ul) 0, 4, {2, 6, 0x23744899ul SEL2(0xce1deceaul)}},
|
||||
/* 30 */ {SEL1(0x88888889ul) 0, 4, {2, 6, 0x2b73a840ul SEL2(0x790fc511ul)}},
|
||||
/* 31 */ {SEL1(0x08421085ul) 1, 5, {2, 6, 0x34e63b41ul SEL2(0x35b865a0ul)}},
|
||||
/* 32 */ {SEL1(0ul) 1, 5, {1, 6, 0x40000000ul SEL2(0xfffffffful)}},
|
||||
/* 33 */ {SEL1(0x3e0f83e1ul) 0, 3, {1, 6, 0x4cfa3cc1ul SEL2(0xa9aed1b3ul)}},
|
||||
/* 34 */ {SEL1(0xf0f0f0f1ul) 0, 5, {1, 6, 0x5c13d840ul SEL2(0x63dfc229ul)}},
|
||||
/* 35 */ {SEL1(0xd41d41d5ul) 1, 6, {1, 6, 0x6d91b519ul SEL2(0x2b0fee30ul)}},
|
||||
/* 36 */ {SEL1(0x38e38e39ul) 0, 3, {0, 6, 0x81bf1000ul SEL2(0xf91bd1b6ul)}}
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Lower-case digits. */
|
||||
static const char _itoa_lower_digits[]
|
||||
= "0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
/* Upper-case digits. */
|
||||
static const char _itoa_upper_digits[]
|
||||
= "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
|
||||
|
||||
char *
|
||||
_itoa (value, buflim, base, upper_case)
|
||||
unsigned long long int value;
|
||||
char *buflim;
|
||||
unsigned int base;
|
||||
int upper_case;
|
||||
{
|
||||
const char *digits = upper_case ? _itoa_upper_digits : _itoa_lower_digits;
|
||||
char *bp = buflim;
|
||||
const struct base_table_t *brec = &base_table[base - 2];
|
||||
|
||||
switch (base)
|
||||
{
|
||||
#define RUN_2N(BITS) \
|
||||
do \
|
||||
{ \
|
||||
/* `unsigned long long int' always has 64 bits. */ \
|
||||
mp_limb work_hi = value >> (64 - BITS_PER_MP_LIMB); \
|
||||
\
|
||||
if (BITS_PER_MP_LIMB == 32) \
|
||||
if (work_hi != 0) \
|
||||
{ \
|
||||
mp_limb work_lo; \
|
||||
int cnt; \
|
||||
\
|
||||
work_lo = value & 0xfffffffful; \
|
||||
for (cnt = BITS_PER_MP_LIMB / BITS; cnt > 0; --cnt) \
|
||||
{ \
|
||||
*--bp = digits[work_lo & ((1ul << BITS) - 1)]; \
|
||||
work_lo >>= BITS; \
|
||||
} \
|
||||
if (BITS_PER_MP_LIMB % BITS != 0) \
|
||||
{ \
|
||||
work_lo |= ((work_hi \
|
||||
& ((1 << BITS - BITS_PER_MP_LIMB % BITS) \
|
||||
- 1)) \
|
||||
<< BITS_PER_MP_LIMB % BITS); \
|
||||
*--bp = digits[work_lo]; \
|
||||
work_hi >>= BITS - BITS_PER_MP_LIMB % BITS; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
work_hi = value & 0xfffffffful; \
|
||||
do \
|
||||
{ \
|
||||
*--bp = digits[work_hi & ((1 << BITS) - 1)]; \
|
||||
work_hi >>= BITS; \
|
||||
} \
|
||||
while (work_hi != 0); \
|
||||
} \
|
||||
while (0)
|
||||
case 8:
|
||||
RUN_2N (3);
|
||||
break;
|
||||
|
||||
case 16:
|
||||
RUN_2N (4);
|
||||
break;
|
||||
|
||||
default:
|
||||
{
|
||||
#if BITS_PER_MP_LIMB == 64
|
||||
mp_limb base_multiplier = brec->base_multiplier;
|
||||
if (brec->flag)
|
||||
while (value != 0)
|
||||
{
|
||||
mp_limb quo, rem, x, dummy;
|
||||
|
||||
umul_ppmm (x, dummy, value, base_multiplier);
|
||||
quo = (x + ((value - x) >> 1)) >> (brec->post_shift - 1);
|
||||
rem = value - quo * base;
|
||||
*--bp = digits[rem];
|
||||
value = quo;
|
||||
}
|
||||
else
|
||||
while (value != 0)
|
||||
{
|
||||
mp_limb quo, rem, x, dummy;
|
||||
|
||||
umul_ppmm (x, dummy, value, base_multiplier);
|
||||
quo = x >> brec->post_shift;
|
||||
rem = value - quo * base;
|
||||
*--bp = digits[rem];
|
||||
value = quo;
|
||||
}
|
||||
#endif
|
||||
#if BITS_PER_MP_LIMB == 32
|
||||
mp_limb t[3];
|
||||
int n;
|
||||
|
||||
/* First convert x0 to 1-3 words in base s->big.base.
|
||||
Optimize for frequent cases of 32 bit numbers. */
|
||||
if ((mp_limb) (value >> 32) >= 1)
|
||||
{
|
||||
int big_normalization_steps = brec->big.normalization_steps;
|
||||
mp_limb big_base_norm = brec->big.base << big_normalization_steps;
|
||||
|
||||
if ((mp_limb) (value >> 32) >= brec->big.base)
|
||||
{
|
||||
mp_limb x1hi, x1lo, r;
|
||||
/* If you want to optimize this, take advantage of
|
||||
that the quotient in the first udiv_qrnnd will
|
||||
always be very small. It might be faster just to
|
||||
subtract in a tight loop. */
|
||||
|
||||
#if UDIV_TIME > 2 * UMUL_TIME
|
||||
mp_limb x, xh, xl;
|
||||
|
||||
if (big_normalization_steps == 0)
|
||||
xh = 0;
|
||||
else
|
||||
xh = (mp_limb) (value >> 64 - big_normalization_steps);
|
||||
xl = (mp_limb) (value >> 32 - big_normalization_steps);
|
||||
udiv_qrnnd_preinv (x1hi, r, xh, xl, big_base_norm,
|
||||
brec->big.base_ninv);
|
||||
|
||||
xl = ((mp_limb) value) << big_normalization_steps;
|
||||
udiv_qrnnd_preinv (x1lo, x, r, xl, big_base_norm,
|
||||
big_normalization_steps);
|
||||
t[2] = x >> big_normalization_steps;
|
||||
|
||||
if (big_normalization_steps == 0)
|
||||
xh = x1hi;
|
||||
else
|
||||
xh = ((x1hi << big_normalization_steps)
|
||||
| (x1lo >> 32 - big_normalization_steps));
|
||||
xl = x1lo << big_normalization_steps;
|
||||
udiv_qrnnd_preinv (t[0], x, xh, xl, big_base_norm,
|
||||
big_normalization_steps);
|
||||
t[1] = x >> big_normalization_steps;
|
||||
#elif UDIV_NEEDS_NORMALIZATION
|
||||
mp_limb x, xh, xl;
|
||||
|
||||
if (big_normalization_steps == 0)
|
||||
xh = 0;
|
||||
else
|
||||
xh = (mp_limb) (value >> 64 - big_normalization_steps);
|
||||
xl = (mp_limb) (value >> 32 - big_normalization_steps);
|
||||
udiv_qrnnd (x1hi, r, xh, xl, big_base_norm);
|
||||
|
||||
xl = ((mp_limb) value) << big_normalization_steps;
|
||||
udiv_qrnnd (x1lo, x, r, xl, big_base_norm);
|
||||
t[2] = x >> big_normalization_steps;
|
||||
|
||||
if (big_normalization_steps == 0)
|
||||
xh = x1hi;
|
||||
else
|
||||
xh = ((x1hi << big_normalization_steps)
|
||||
| (x1lo >> 32 - big_normalization_steps));
|
||||
xl = x1lo << big_normalization_steps;
|
||||
udiv_qrnnd (t[0], x, xh, xl, big_base_norm);
|
||||
t[1] = x >> big_normalization_steps;
|
||||
#else
|
||||
udiv_qrnnd (x1hi, r, 0, (mp_limb) (value >> 32),
|
||||
brec->big.base);
|
||||
udiv_qrnnd (x1lo, t[2], r, (mp_limb) value, brec->big.base);
|
||||
udiv_qrnnd (t[0], t[1], x1hi, x1lo, brec->big.base);
|
||||
#endif
|
||||
n = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if (UDIV_TIME > 2 * UMUL_TIME)
|
||||
mp_limb x;
|
||||
|
||||
value <<= brec->big.normalization_steps;
|
||||
udiv_qrnnd_preinv (t[0], x, (mp_limb) (value >> 32),
|
||||
(mp_limb) value, big_base_norm,
|
||||
brec->big.base_ninv);
|
||||
t[1] = x >> brec->big.normalization_steps;
|
||||
#elif UDIV_NEEDS_NORMALIZATION
|
||||
mp_limb x;
|
||||
|
||||
value <<= big_normalization_steps;
|
||||
udiv_qrnnd (t[0], x, (mp_limb) (value >> 32),
|
||||
(mp_limb) value, big_base_norm);
|
||||
t[1] = x >> big_normalization_steps;
|
||||
#else
|
||||
udiv_qrnnd (t[0], t[1], (mp_limb) (value >> 32),
|
||||
(mp_limb) value, brec->big.base);
|
||||
#endif
|
||||
n = 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
t[0] = value;
|
||||
n = 1;
|
||||
}
|
||||
|
||||
/* Convert the 1-3 words in t[], word by word, to ASCII. */
|
||||
do
|
||||
{
|
||||
mp_limb ti = t[--n];
|
||||
int ndig_for_this_limb = 0;
|
||||
|
||||
#if UDIV_TIME > 2 * UMUL_TIME
|
||||
mp_limb base_multiplier = brec->base_multiplier;
|
||||
if (brec->flag)
|
||||
while (ti != 0)
|
||||
{
|
||||
mp_limb quo, rem, x, dummy;
|
||||
|
||||
umul_ppmm (x, dummy, ti, base_multiplier);
|
||||
quo = (x + ((ti - x) >> 1)) >> (brec->post_shift - 1);
|
||||
rem = ti - quo * base;
|
||||
*--bp = digits[rem];
|
||||
ti = quo;
|
||||
++ndig_for_this_limb;
|
||||
}
|
||||
else
|
||||
while (ti != 0)
|
||||
{
|
||||
mp_limb quo, rem, x, dummy;
|
||||
|
||||
umul_ppmm (x, dummy, ti, base_multiplier);
|
||||
quo = x >> brec->post_shift;
|
||||
rem = ti - quo * base;
|
||||
*--bp = digits[rem];
|
||||
ti = quo;
|
||||
++ndig_for_this_limb;
|
||||
}
|
||||
#else
|
||||
while (ti != 0)
|
||||
{
|
||||
mp_limb quo, rem;
|
||||
|
||||
quo = ti / base;
|
||||
rem = ti % base;
|
||||
*--bp = digits[rem];
|
||||
ti = quo;
|
||||
++ndig_for_this_limb;
|
||||
}
|
||||
#endif
|
||||
/* If this wasn't the most significant word, pad with zeros. */
|
||||
if (n != 0)
|
||||
while (ndig_for_this_limb < brec->big.ndigits)
|
||||
{
|
||||
*--bp = '0';
|
||||
++ndig_for_this_limb;
|
||||
}
|
||||
}
|
||||
while (n != 0);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return bp;
|
||||
}
|
||||
|
@ -21,8 +21,6 @@ Cambridge, MA 02139, USA. */
|
||||
#define _ITOA_H
|
||||
#include <sys/cdefs.h>
|
||||
|
||||
extern const char _itoa_lower_digits[], _itoa_upper_digits[];
|
||||
|
||||
/* Convert VALUE into ASCII in base BASE (2..36).
|
||||
Write backwards starting the character just before BUFLIM.
|
||||
Return the address of the first (left-to-right) character in the number.
|
||||
@ -31,28 +29,4 @@ extern const char _itoa_lower_digits[], _itoa_upper_digits[];
|
||||
extern char *_itoa __P ((unsigned long long int value, char *buflim,
|
||||
unsigned int base, int upper_case));
|
||||
|
||||
#ifndef _EXTERN_INLINE
|
||||
#define _EXTERN_INLINE extern __inline
|
||||
#endif
|
||||
|
||||
_EXTERN_INLINE
|
||||
char *
|
||||
_itoa (unsigned long long int value, char *buflim,
|
||||
unsigned int base, int upper_case)
|
||||
{
|
||||
/* Base-36 digits for numbers. */
|
||||
const char *digits = upper_case ? _itoa_upper_digits : _itoa_lower_digits;
|
||||
|
||||
register char *bp = buflim;
|
||||
|
||||
while (value > 0)
|
||||
{
|
||||
*--bp = digits[value % base];
|
||||
value /= base;
|
||||
}
|
||||
|
||||
return bp;
|
||||
}
|
||||
|
||||
|
||||
#endif /* itoa.h */
|
||||
|
@ -17,6 +17,7 @@ not, write to the Free Software Foundation, Inc., 675 Mass Ave,
|
||||
Cambridge, MA 02139, USA. */
|
||||
|
||||
#include <ansidecl.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
@ -19,11 +19,17 @@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#if ! defined (alloca)
|
||||
#if defined (__GNUC__) || defined (__sparc__) || defined (sparc)
|
||||
#if defined (__GNUC__)
|
||||
#define alloca __builtin_alloca
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if ! defined (alloca)
|
||||
#if defined (__sparc__) || defined (sparc) || defined (__sgi)
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL 0L
|
||||
#endif
|
||||
@ -168,6 +174,7 @@ void _mp_default_free ();
|
||||
else \
|
||||
____mpn_sqr_n (prodp, up, size, tspace); \
|
||||
} while (0);
|
||||
#define assert(trueval) do {if (!(trueval)) abort ();} while (0)
|
||||
|
||||
/* Structure for conversion between internal binary format and
|
||||
strings in base 2..36. */
|
||||
@ -197,9 +204,11 @@ struct bases
|
||||
extern const struct bases __mp_bases[];
|
||||
extern mp_size_t __gmp_default_fp_limb_precision;
|
||||
|
||||
/* Divide the two-limb number in (NH,,NL) by D, with DI being a 32 bit
|
||||
approximation to (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
|
||||
Put the quotient in Q and the remainder in R. */
|
||||
/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
|
||||
limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
|
||||
If this would yield overflow, DI should be the largest possible number
|
||||
(i.e., only ones). For correct operation, the most significant bit of D
|
||||
has to be set. Put the quotient in Q and the remainder in R. */
|
||||
#define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
|
||||
do { \
|
||||
mp_limb _q, _ql, _r; \
|
||||
@ -226,6 +235,8 @@ extern mp_size_t __gmp_default_fp_limb_precision;
|
||||
(r) = _r; \
|
||||
(q) = _q; \
|
||||
} while (0)
|
||||
/* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
|
||||
so that its most significant bit is set. LGUP is ceil(log2(D)). */
|
||||
#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
|
||||
do { \
|
||||
mp_limb n2, n10, n1, nadj, q1; \
|
||||
@ -243,6 +254,8 @@ extern mp_size_t __gmp_default_fp_limb_precision;
|
||||
(r) = _xl + ((d) & _xh); \
|
||||
(q) = _xh - q1; \
|
||||
} while (0)
|
||||
/* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
|
||||
version to use. */
|
||||
#define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
|
||||
do { \
|
||||
mp_limb n2, n10, n1, nadj, q1; \
|
||||
@ -262,22 +275,49 @@ extern mp_size_t __gmp_default_fp_limb_precision;
|
||||
} while (0)
|
||||
|
||||
#if defined (__GNUC__)
|
||||
/* Define stuff for longlong.h asm macros. */
|
||||
#if __GNUC_NEW_ATTR_MODE_SYNTAX
|
||||
typedef unsigned int UQItype __attribute__ ((mode ("QI")));
|
||||
typedef int SItype __attribute__ ((mode ("SI")));
|
||||
typedef unsigned int USItype __attribute__ ((mode ("SI")));
|
||||
typedef int DItype __attribute__ ((mode ("DI")));
|
||||
typedef unsigned int UDItype __attribute__ ((mode ("DI")));
|
||||
#else
|
||||
/* Define stuff for longlong.h. */
|
||||
typedef unsigned int UQItype __attribute__ ((mode (QI)));
|
||||
typedef int SItype __attribute__ ((mode (SI)));
|
||||
typedef unsigned int USItype __attribute__ ((mode (SI)));
|
||||
typedef int DItype __attribute__ ((mode (DI)));
|
||||
typedef unsigned int UDItype __attribute__ ((mode (DI)));
|
||||
#endif
|
||||
#else
|
||||
typedef unsigned char UQItype;
|
||||
typedef long SItype;
|
||||
typedef unsigned long USItype;
|
||||
#endif
|
||||
|
||||
typedef mp_limb UWtype;
|
||||
typedef unsigned int UHWtype;
|
||||
#define W_TYPE_SIZE BITS_PER_MP_LIMB
|
||||
|
||||
|
||||
#ifndef IEEE_DOUBLE_BIG_ENDIAN
|
||||
#define IEEE_DOUBLE_BIG_ENDIAN 1
|
||||
#endif
|
||||
|
||||
#if IEEE_DOUBLE_BIG_ENDIAN
|
||||
union ieee_double_extract
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned long sig:1;
|
||||
unsigned long exp:11;
|
||||
unsigned long manh:20;
|
||||
unsigned long manl:32;
|
||||
} s;
|
||||
double d;
|
||||
};
|
||||
#else
|
||||
union ieee_double_extract
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned long manl:32;
|
||||
unsigned long manh:20;
|
||||
unsigned long exp:11;
|
||||
unsigned long sig:1;
|
||||
} s;
|
||||
double d;
|
||||
};
|
||||
#endif
|
||||
|
40
stdlib/gmp.h
40
stdlib/gmp.h
@ -24,13 +24,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
#define __need_size_t
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __STDC__
|
||||
#if defined (__STDC__)
|
||||
#define __gmp_const const
|
||||
#else
|
||||
#define __gmp_const
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#if defined (__GNUC__)
|
||||
#define __gmp_inline inline
|
||||
#else
|
||||
#define __gmp_inline
|
||||
@ -40,9 +40,14 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
typedef unsigned int mp_limb;
|
||||
typedef int mp_limb_signed;
|
||||
#else
|
||||
#if _LONG_LONG_LIMB
|
||||
typedef unsigned long long int mp_limb;
|
||||
typedef long long int mp_limb_signed;
|
||||
#else
|
||||
typedef unsigned long int mp_limb;
|
||||
typedef long int mp_limb_signed;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef mp_limb * mp_ptr;
|
||||
typedef __gmp_const mp_limb * mp_srcptr;
|
||||
@ -52,9 +57,9 @@ typedef long int mp_exp_t;
|
||||
#ifndef __MP_SMALL__
|
||||
typedef struct
|
||||
{
|
||||
long int alloc; /* Number of *limbs* allocated and pointed
|
||||
mp_size_t alloc; /* Number of *limbs* allocated and pointed
|
||||
to by the D field. */
|
||||
long int size; /* abs(SIZE) is the number of limbs
|
||||
mp_size_t size; /* abs(SIZE) is the number of limbs
|
||||
the last field points to. If SIZE
|
||||
is negative this is a negative
|
||||
number. */
|
||||
@ -130,12 +135,16 @@ typedef __mpf_struct *mpf_ptr;
|
||||
typedef __gmp_const __mpq_struct *mpq_srcptr;
|
||||
typedef __mpq_struct *mpq_ptr;
|
||||
|
||||
#ifdef __STDC__
|
||||
#if defined (__STDC__)
|
||||
#define _PROTO(x) x
|
||||
#else
|
||||
#define _PROTO(x) ()
|
||||
#endif
|
||||
|
||||
#if defined (FILE) || defined (_STDIO_H_) || defined (__STDIO_H__) || defined (H_STDIO)
|
||||
#define _GMP_H_HAVE_FILE 1
|
||||
#endif
|
||||
|
||||
void mp_set_memory_functions _PROTO((void *(*) (size_t),
|
||||
void *(*) (void *, size_t, size_t),
|
||||
void (*) (void *, size_t)));
|
||||
@ -165,7 +174,7 @@ unsigned long int mpz_get_ui _PROTO ((mpz_srcptr));
|
||||
mp_limb mpz_getlimbn _PROTO ((mpz_srcptr, mp_size_t));
|
||||
mp_size_t mpz_hamdist _PROTO ((mpz_srcptr, mpz_srcptr));
|
||||
void mpz_init _PROTO ((mpz_ptr));
|
||||
#ifdef FILE
|
||||
#ifdef _GMP_H_HAVE_FILE
|
||||
void mpz_inp_raw _PROTO ((mpz_ptr, FILE *));
|
||||
int mpz_inp_str _PROTO ((mpz_ptr, FILE *, int));
|
||||
#endif
|
||||
@ -180,7 +189,7 @@ void mpz_mul _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
|
||||
void mpz_mul_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
|
||||
void mpz_mul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
|
||||
void mpz_neg _PROTO ((mpz_ptr, mpz_srcptr));
|
||||
#ifdef FILE
|
||||
#ifdef _GMP_H_HAVE_FILE
|
||||
void mpz_out_raw _PROTO ((FILE *, mpz_srcptr));
|
||||
void mpz_out_str _PROTO ((FILE *, int, mpz_srcptr));
|
||||
#endif
|
||||
@ -218,6 +227,8 @@ void mpz_tdiv_qr_ui _PROTO((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
|
||||
void mpz_tdiv_r _PROTO((mpz_ptr, mpz_srcptr, mpz_srcptr));
|
||||
void mpz_tdiv_r_ui _PROTO((mpz_ptr, mpz_srcptr, unsigned long int));
|
||||
|
||||
void mpz_array_init (mpz_ptr, size_t, mp_size_t);
|
||||
|
||||
/**************** Rational (i.e. Q) routines. ****************/
|
||||
|
||||
void mpq_init _PROTO ((mpq_ptr));
|
||||
@ -253,7 +264,7 @@ void mpf_dump _PROTO ((mpf_srcptr));
|
||||
char *mpf_get_str _PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
|
||||
void mpf_init _PROTO ((mpf_ptr));
|
||||
void mpf_init2 _PROTO ((mpf_ptr, mp_size_t));
|
||||
#ifdef FILE
|
||||
#ifdef _GMP_H_HAVE_FILE
|
||||
void mpf_inp_str _PROTO ((mpf_ptr, FILE *, int));
|
||||
#endif
|
||||
void mpf_init_set _PROTO ((mpf_ptr, mpf_srcptr));
|
||||
@ -265,7 +276,7 @@ void mpf_mul _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
|
||||
void mpf_mul_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
|
||||
void mpf_mul_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
|
||||
void mpf_neg _PROTO ((mpf_ptr, mpf_srcptr));
|
||||
#ifdef FILE
|
||||
#ifdef _GMP_H_HAVE_FILE
|
||||
void mpf_out_str _PROTO ((mpf_ptr, int, size_t, FILE *));
|
||||
#endif
|
||||
void mpf_set _PROTO ((mpf_ptr, mpf_srcptr));
|
||||
@ -335,7 +346,7 @@ mp_limb __mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb));
|
||||
|
||||
|
||||
static __gmp_inline mp_limb
|
||||
#if __STDC__
|
||||
#if defined (__STDC__)
|
||||
__mpn_add_1 (register mp_ptr res_ptr,
|
||||
register mp_srcptr s1_ptr,
|
||||
register mp_size_t s1_size,
|
||||
@ -377,7 +388,7 @@ __mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb)
|
||||
}
|
||||
|
||||
static __gmp_inline mp_limb
|
||||
#if __STDC__
|
||||
#if defined (__STDC__)
|
||||
__mpn_add (register mp_ptr res_ptr,
|
||||
register mp_srcptr s1_ptr,
|
||||
register mp_size_t s1_size,
|
||||
@ -406,7 +417,7 @@ __mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
|
||||
}
|
||||
|
||||
static __gmp_inline mp_limb
|
||||
#if __STDC__
|
||||
#if defined (__STDC__)
|
||||
__mpn_sub_1 (register mp_ptr res_ptr,
|
||||
register mp_srcptr s1_ptr,
|
||||
register mp_size_t s1_size,
|
||||
@ -448,7 +459,7 @@ __mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb)
|
||||
}
|
||||
|
||||
static __gmp_inline mp_limb
|
||||
#if __STDC__
|
||||
#if defined (__STDC__)
|
||||
__mpn_sub (register mp_ptr res_ptr,
|
||||
register mp_srcptr s1_ptr,
|
||||
register mp_size_t s1_size,
|
||||
@ -477,7 +488,7 @@ __mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
|
||||
}
|
||||
|
||||
static __gmp_inline mp_size_t
|
||||
#if __STDC__
|
||||
#if defined (__STDC__)
|
||||
__mpn_normal_size (mp_srcptr ptr, mp_size_t size)
|
||||
#else
|
||||
__mpn_normal_size (ptr, size)
|
||||
@ -512,7 +523,6 @@ __mpn_normal_size (ptr, size)
|
||||
/* Useful synonyms, but not quite compatible with GMP 1. */
|
||||
#define mpz_div mpz_fdiv_q
|
||||
#define mpz_divmod mpz_fdiv_qr
|
||||
#define mpz_mod mpz_fdiv_r
|
||||
#define mpz_div_ui mpz_fdiv_q_ui
|
||||
#define mpz_divmod_ui mpz_fdiv_qr_ui
|
||||
#define mpz_mod_ui mpz_fdiv_r_ui
|
||||
|
@ -97,7 +97,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
#define __AND_CLOBBER_CC , "cc"
|
||||
#endif /* __GNUC__ < 2 */
|
||||
|
||||
#if (defined (__a29k__) || defined (___AM29K__)) && W_TYPE_SIZE == 32
|
||||
#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
|
||||
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
||||
__asm__ ("add %1,%4,%5
|
||||
addc %0,%2,%3" \
|
||||
@ -152,6 +152,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
(pl) = __m0 * __m1; \
|
||||
} while (0)
|
||||
#define UMUL_TIME 46
|
||||
#ifndef LONGLONG_STANDALONE
|
||||
#define udiv_qrnnd(q, r, n1, n0, d) \
|
||||
do { UDItype __r; \
|
||||
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
|
||||
@ -159,12 +160,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
} while (0)
|
||||
extern UDItype __udiv_qrnnd ();
|
||||
#define UDIV_TIME 220
|
||||
#endif
|
||||
#endif /* LONGLONG_STANDALONE */
|
||||
#endif /* __alpha__ */
|
||||
|
||||
#if defined (__arm__) && W_TYPE_SIZE == 32
|
||||
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
||||
__asm__ ("adds %1,%4,%5
|
||||
adc %0,%2,%3" \
|
||||
__asm__ ("adds %1, %4, %5
|
||||
adc %0, %2, %3" \
|
||||
: "=r" ((USItype)(sh)), \
|
||||
"=&r" ((USItype)(sl)) \
|
||||
: "%r" ((USItype)(ah)), \
|
||||
@ -172,8 +174,8 @@ extern UDItype __udiv_qrnnd ();
|
||||
"%r" ((USItype)(al)), \
|
||||
"rI" ((USItype)(bl)))
|
||||
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
||||
__asm__ ("subs %1,%4,%5
|
||||
sbc %0,%2,%3" \
|
||||
__asm__ ("subs %1, %4, %5
|
||||
sbc %0, %2, %3" \
|
||||
: "=r" ((USItype)(sh)), \
|
||||
"=&r" ((USItype)(sl)) \
|
||||
: "r" ((USItype)(ah)), \
|
||||
@ -181,19 +183,19 @@ extern UDItype __udiv_qrnnd ();
|
||||
"r" ((USItype)(al)), \
|
||||
"rI" ((USItype)(bl)))
|
||||
#define umul_ppmm(xh, xl, a, b) \
|
||||
__asm__ ("; Inlined umul_ppmm
|
||||
mov r0,%2 lsr 16
|
||||
mov r2,%3 lsr 16
|
||||
bic r1,%2,r0 lsl 16
|
||||
bic r2,%3,r2 lsl 16
|
||||
mul %1,r1,r2
|
||||
mul r2,r0,r2
|
||||
mul r1,%0,r1
|
||||
mul %0,r0,%0
|
||||
adds r1,r2,r1
|
||||
addcs %0,%0,0x10000
|
||||
adds %1,%1,r1 lsl 16
|
||||
adc %0,%0,r1 lsr 16" \
|
||||
__asm__ ("%@ Inlined umul_ppmm
|
||||
mov %|r0, %2, lsr #16
|
||||
mov %|r2, %3, lsr #16
|
||||
bic %|r1, %2, %|r0, lsl #16
|
||||
bic %|r2, %3, %|r2, lsl #16
|
||||
mul %1, %|r1, %|r2
|
||||
mul %|r2, %|r0, %|r2
|
||||
mul %|r1, %0, %|r1
|
||||
mul %0, %|r0, %0
|
||||
adds %|r1, %|r2, %|r1
|
||||
addcs %0, %0, #65536
|
||||
adds %1, %1, %|r1, lsl #16
|
||||
adc %0, %0, %|r1, lsr #16" \
|
||||
: "=&r" ((USItype)(xh)), \
|
||||
"=r" ((USItype)(xl)) \
|
||||
: "r" ((USItype)(a)), \
|
||||
@ -296,9 +298,9 @@ extern UDItype __udiv_qrnnd ();
|
||||
struct {USItype __h, __l;} __i; \
|
||||
} __xx; \
|
||||
__asm__ ("xmpyu %1,%2,%0" \
|
||||
: "=x" (__xx.__ll) \
|
||||
: "x" ((USItype)(u)), \
|
||||
"x" ((USItype)(v))); \
|
||||
: "=fx" (__xx.__ll) \
|
||||
: "fx" ((USItype)(u)), \
|
||||
"fx" ((USItype)(v))); \
|
||||
(wh) = __xx.__i.__h; \
|
||||
(wl) = __xx.__i.__l; \
|
||||
} while (0)
|
||||
@ -308,12 +310,14 @@ extern UDItype __udiv_qrnnd ();
|
||||
#define UMUL_TIME 40
|
||||
#define UDIV_TIME 80
|
||||
#endif
|
||||
#ifndef LONGLONG_STANDALONE
|
||||
#define udiv_qrnnd(q, r, n1, n0, d) \
|
||||
do { USItype __r; \
|
||||
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
|
||||
(r) = __r; \
|
||||
} while (0)
|
||||
extern USItype __udiv_qrnnd ();
|
||||
#endif /* LONGLONG_STANDALONE */
|
||||
#define count_leading_zeros(count, x) \
|
||||
do { \
|
||||
USItype __tmp; \
|
||||
@ -419,8 +423,12 @@ extern USItype __udiv_qrnnd ();
|
||||
} while (0)
|
||||
#define count_trailing_zeros(count, x) \
|
||||
__asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
|
||||
#ifndef UMUL_TIME
|
||||
#define UMUL_TIME 40
|
||||
#endif
|
||||
#ifndef UDIV_TIME
|
||||
#define UDIV_TIME 40
|
||||
#endif
|
||||
#endif /* 80x86 */
|
||||
|
||||
#if defined (__i960__) && W_TYPE_SIZE == 32
|
||||
@ -442,7 +450,7 @@ extern USItype __udiv_qrnnd ();
|
||||
__w; })
|
||||
#endif /* __i960__ */
|
||||
|
||||
#if defined (__mc68000__) && W_TYPE_SIZE == 32
|
||||
#if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
|
||||
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
||||
__asm__ ("add%.l %5,%1
|
||||
addx%.l %3,%0" \
|
||||
@ -489,38 +497,34 @@ extern USItype __udiv_qrnnd ();
|
||||
: "=d" ((USItype)(count)) \
|
||||
: "od" ((USItype)(x)), "n" (0))
|
||||
#else /* not mc68020 */
|
||||
#define umul_ppmm(xh, xl, a, b) \
|
||||
__asm__ ("| Inlined umul_ppmm
|
||||
move%.l %2,%/d0
|
||||
move%.l %3,%/d1
|
||||
move%.l %/d0,%/d2
|
||||
swap %/d0
|
||||
move%.l %/d1,%/d3
|
||||
swap %/d1
|
||||
move%.w %/d2,%/d4
|
||||
mulu %/d3,%/d4
|
||||
mulu %/d1,%/d2
|
||||
mulu %/d0,%/d3
|
||||
mulu %/d0,%/d1
|
||||
move%.l %/d4,%/d0
|
||||
eor%.w %/d0,%/d0
|
||||
swap %/d0
|
||||
add%.l %/d0,%/d2
|
||||
add%.l %/d3,%/d2
|
||||
#define umul_ppmmxx(xh, xl, a, b) \
|
||||
do { USItype __umul_tmp1, __umul_tmp2; \
|
||||
__asm__ ("| Inlined umul_ppmm
|
||||
move%.l %5,%3
|
||||
move%.l %2,%0
|
||||
move%.w %3,%1
|
||||
swap %3
|
||||
swap %0
|
||||
mulu %2,%1
|
||||
mulu %3,%0
|
||||
mulu %2,%3
|
||||
swap %2
|
||||
mulu %5,%2
|
||||
add%.l %3,%2
|
||||
jcc 1f
|
||||
add%.l #65536,%/d1
|
||||
1: swap %/d2
|
||||
moveq #0,%/d0
|
||||
move%.w %/d2,%/d0
|
||||
move%.w %/d4,%/d2
|
||||
move%.l %/d2,%1
|
||||
add%.l %/d1,%/d0
|
||||
move%.l %/d0,%0" \
|
||||
: "=g" ((USItype)(xh)), \
|
||||
"=g" ((USItype)(xl)) \
|
||||
: "g" ((USItype)(a)), \
|
||||
"g" ((USItype)(b)) \
|
||||
: "d0", "d1", "d2", "d3", "d4")
|
||||
add%.l %#0x10000,%0
|
||||
1: move%.l %2,%3
|
||||
clr%.w %2
|
||||
swap %2
|
||||
swap %3
|
||||
clr%.w %3
|
||||
add%.l %3,%1
|
||||
addx%.l %2,%0
|
||||
| End inlined umul_ppmm" \
|
||||
: "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
|
||||
"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
|
||||
: "%2" ((USItype)(a)), "d" ((USItype)(b))); \
|
||||
} while (0)
|
||||
#define UMUL_TIME 100
|
||||
#define UDIV_TIME 400
|
||||
#endif /* not mc68020 */
|
||||
@ -553,7 +557,7 @@ extern USItype __udiv_qrnnd ();
|
||||
: "r" ((USItype)(x))); \
|
||||
(count) = __cbtmp ^ 31; \
|
||||
} while (0)
|
||||
#if defined (__mc88110__)
|
||||
#if defined (__m88110__)
|
||||
#define umul_ppmm(wh, wl, u, v) \
|
||||
do { \
|
||||
union {UDItype __ll; \
|
||||
@ -582,10 +586,18 @@ extern USItype __udiv_qrnnd ();
|
||||
#else
|
||||
#define UMUL_TIME 17
|
||||
#define UDIV_TIME 150
|
||||
#endif /* __mc88110__ */
|
||||
#endif /* __m88110__ */
|
||||
#endif /* __m88000__ */
|
||||
|
||||
#if defined (__mips__) && W_TYPE_SIZE == 32
|
||||
#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
|
||||
#define umul_ppmm(w1, w0, u, v) \
|
||||
__asm__ ("multu %2,%3" \
|
||||
: "=l" ((USItype)(w0)), \
|
||||
"=h" ((USItype)(w1)) \
|
||||
: "d" ((USItype)(u)), \
|
||||
"d" ((USItype)(v)))
|
||||
#else
|
||||
#define umul_ppmm(w1, w0, u, v) \
|
||||
__asm__ ("multu %2,%3
|
||||
mflo %0
|
||||
@ -594,11 +606,20 @@ extern USItype __udiv_qrnnd ();
|
||||
"=d" ((USItype)(w1)) \
|
||||
: "d" ((USItype)(u)), \
|
||||
"d" ((USItype)(v)))
|
||||
#endif
|
||||
#define UMUL_TIME 10
|
||||
#define UDIV_TIME 100
|
||||
#endif /* __mips__ */
|
||||
|
||||
#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
|
||||
#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
|
||||
#define umul_ppmm(w1, w0, u, v) \
|
||||
__asm__ ("dmultu %2,%3" \
|
||||
: "=l" ((UDItype)(w0)), \
|
||||
"=h" ((UDItype)(w1)) \
|
||||
: "d" ((UDItype)(u)), \
|
||||
"d" ((UDItype)(v)))
|
||||
#else
|
||||
#define umul_ppmm(w1, w0, u, v) \
|
||||
__asm__ ("dmultu %2,%3
|
||||
mflo %0
|
||||
@ -607,8 +628,9 @@ extern USItype __udiv_qrnnd ();
|
||||
"=d" ((UDItype)(w1)) \
|
||||
: "d" ((UDItype)(u)), \
|
||||
"d" ((UDItype)(v)))
|
||||
#define UMUL_TIME 10
|
||||
#define UDIV_TIME 100
|
||||
#endif
|
||||
#define UMUL_TIME 20
|
||||
#define UDIV_TIME 140
|
||||
#endif /* __mips__ */
|
||||
|
||||
#if defined (__ns32000__) && W_TYPE_SIZE == 32
|
||||
@ -647,7 +669,7 @@ extern USItype __udiv_qrnnd ();
|
||||
} while (0)
|
||||
#endif /* __ns32000__ */
|
||||
|
||||
#if (defined (__powerpc__) || defined (___IBMR2__)) && W_TYPE_SIZE == 32
|
||||
#if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32
|
||||
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
||||
do { \
|
||||
if (__builtin_constant_p (bh) && (bh) == 0) \
|
||||
@ -676,14 +698,14 @@ extern USItype __udiv_qrnnd ();
|
||||
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
||||
do { \
|
||||
if (__builtin_constant_p (ah) && (ah) == 0) \
|
||||
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
|
||||
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
|
||||
: "=r" ((USItype)(sh)), \
|
||||
"=&r" ((USItype)(sl)) \
|
||||
: "r" ((USItype)(bh)), \
|
||||
"rI" ((USItype)(al)), \
|
||||
"r" ((USItype)(bl))); \
|
||||
else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \
|
||||
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
|
||||
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
|
||||
: "=r" ((USItype)(sh)), \
|
||||
"=&r" ((USItype)(sl)) \
|
||||
: "r" ((USItype)(bh)), \
|
||||
@ -716,7 +738,7 @@ extern USItype __udiv_qrnnd ();
|
||||
__asm__ ("{cntlz|cntlzw} %0,%1" \
|
||||
: "=r" ((USItype)(count)) \
|
||||
: "r" ((USItype)(x)))
|
||||
#if defined (__powerpc__)
|
||||
#if defined (_ARCH_PPC)
|
||||
#define umul_ppmm(ph, pl, m0, m1) \
|
||||
do { \
|
||||
USItype __m0 = (m0), __m1 = (m1); \
|
||||
@ -785,16 +807,15 @@ extern USItype __udiv_qrnnd ();
|
||||
"g" ((USItype)(bh)), \
|
||||
"1" ((USItype)(al)), \
|
||||
"g" ((USItype)(bl)))
|
||||
/* This insn doesn't work on ancient pyramids. */
|
||||
/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
|
||||
#define umul_ppmm(w1, w0, u, v) \
|
||||
({union {UDItype __ll; \
|
||||
struct {USItype __h, __l;} __i; \
|
||||
} __xx; \
|
||||
__xx.__i.__l = u; \
|
||||
__asm__ ("uemul %3,%0" \
|
||||
: "=r" (__xx.__i.__h), \
|
||||
"=r" (__xx.__i.__l) \
|
||||
: "1" (__xx.__i.__l), \
|
||||
__asm__ ("movw %1,%R0
|
||||
uemul %2,%0" \
|
||||
: "=&r" (__xx.__ll) \
|
||||
: "g" ((USItype) (u)), \
|
||||
"g" ((USItype)(v))); \
|
||||
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
|
||||
#endif /* __pyr__ */
|
||||
@ -868,6 +889,20 @@ extern USItype __udiv_qrnnd ();
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#if defined (__sh2__) && W_TYPE_SIZE == 32
|
||||
#define umul_ppmm(w1, w0, u, v) \
|
||||
__asm__ ( \
|
||||
"dmulu.l %2,%3
|
||||
sts macl,%1
|
||||
sts mach,%0" \
|
||||
: "=r" ((USItype)(w1)), \
|
||||
"=r" ((USItype)(w0)) \
|
||||
: "r" ((USItype)(u)), \
|
||||
"r" ((USItype)(v)) \
|
||||
: "macl", "mach")
|
||||
#define UMUL_TIME 5
|
||||
#endif
|
||||
|
||||
#if defined (__sparc__) && W_TYPE_SIZE == 32
|
||||
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
||||
__asm__ ("addcc %r4,%5,%1
|
||||
@ -901,17 +936,21 @@ extern USItype __udiv_qrnnd ();
|
||||
: "r" ((USItype)(u)), \
|
||||
"r" ((USItype)(v)))
|
||||
#define UMUL_TIME 5
|
||||
/* We might want to leave this undefined for `SuperSPARC (tm)' since
|
||||
its implementation is crippled and often traps. */
|
||||
#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
|
||||
#define udiv_qrnnd(q, r, n1, n0, d) \
|
||||
__asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
|
||||
: "=&r" ((USItype)(q)), \
|
||||
"=&r" ((USItype)(r)) \
|
||||
: "r" ((USItype)(n1)), \
|
||||
"r" ((USItype)(n0)), \
|
||||
"r" ((USItype)(d)))
|
||||
do { \
|
||||
USItype __q; \
|
||||
__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
|
||||
: "=r" ((USItype)(__q)) \
|
||||
: "r" ((USItype)(n1)), \
|
||||
"r" ((USItype)(n0)), \
|
||||
"r" ((USItype)(d))); \
|
||||
(r) = (n0) - __q * (d); \
|
||||
(q) = __q; \
|
||||
} while (0)
|
||||
#define UDIV_TIME 25
|
||||
#else
|
||||
#endif /* SUPERSPARC */
|
||||
#else /* ! __sparc_v8__ */
|
||||
#if defined (__sparclite__)
|
||||
/* This has hardware multiply but not divide. It also has two additional
|
||||
instructions scan (ffs from high bit) and divscc. */
|
||||
@ -973,9 +1012,10 @@ extern USItype __udiv_qrnnd ();
|
||||
__asm__ ("scan %1,0,%0" \
|
||||
: "=r" ((USItype)(x)) \
|
||||
: "r" ((USItype)(count)))
|
||||
#else
|
||||
/* SPARC without integer multiplication and divide instructions.
|
||||
(i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
|
||||
#endif /* __sparclite__ */
|
||||
#endif /* __sparc_v8__ */
|
||||
/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
|
||||
#ifndef umul_ppmm
|
||||
#define umul_ppmm(w1, w0, u, v) \
|
||||
__asm__ ("! Inlined umul_ppmm
|
||||
wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr
|
||||
@ -1023,6 +1063,9 @@ extern USItype __udiv_qrnnd ();
|
||||
"r" ((USItype)(v)) \
|
||||
: "%g1", "%g2" __AND_CLOBBER_CC)
|
||||
#define UMUL_TIME 39 /* 39 instructions */
|
||||
#endif
|
||||
#ifndef udiv_qrnnd
|
||||
#ifndef LONGLONG_STANDALONE
|
||||
#define udiv_qrnnd(q, r, n1, n0, d) \
|
||||
do { USItype __r; \
|
||||
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
|
||||
@ -1030,8 +1073,8 @@ extern USItype __udiv_qrnnd ();
|
||||
} while (0)
|
||||
extern USItype __udiv_qrnnd ();
|
||||
#define UDIV_TIME 140
|
||||
#endif /* __sparclite__ */
|
||||
#endif /* __sparc_v8__ */
|
||||
#endif /* LONGLONG_STANDALONE */
|
||||
#endif /* udiv_qrnnd */
|
||||
#endif /* __sparc__ */
|
||||
|
||||
#if defined (__vax__) && W_TYPE_SIZE == 32
|
||||
@ -1075,7 +1118,7 @@ extern USItype __udiv_qrnnd ();
|
||||
__xx.__i.__h = n1; __xx.__i.__l = n0; \
|
||||
__asm__ ("ediv %3,%2,%0,%1" \
|
||||
: "=g" (q), "=g" (r) \
|
||||
: "g" (__n1n0.ll), "g" (d)); \
|
||||
: "g" (__xx.ll), "g" (d)); \
|
||||
} while (0)
|
||||
#endif /* __vax__ */
|
||||
|
||||
@ -1173,11 +1216,12 @@ extern USItype __udiv_qrnnd ();
|
||||
do { \
|
||||
UWtype __x0, __x1, __x2, __x3; \
|
||||
UHWtype __ul, __vl, __uh, __vh; \
|
||||
UWtype __u = (u), __v = (v); \
|
||||
\
|
||||
__ul = __ll_lowpart (u); \
|
||||
__uh = __ll_highpart (u); \
|
||||
__vl = __ll_lowpart (v); \
|
||||
__vh = __ll_highpart (v); \
|
||||
__ul = __ll_lowpart (__u); \
|
||||
__uh = __ll_highpart (__u); \
|
||||
__vl = __ll_lowpart (__v); \
|
||||
__vh = __ll_highpart (__v); \
|
||||
\
|
||||
__x0 = (UWtype) __ul * __vl; \
|
||||
__x1 = (UWtype) __ul * __vh; \
|
||||
@ -1194,6 +1238,17 @@ extern USItype __udiv_qrnnd ();
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#if !defined (umul_ppmm)
|
||||
#define smul_ppmm(w1, w0, u, v) \
|
||||
do { \
|
||||
UWtype __w1; \
|
||||
UWtype __m0 = (u), __m1 = (v); \
|
||||
umul_ppmm (__w1, w0, __m0, __m1); \
|
||||
(w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
|
||||
- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/* Define this unconditionally, so it can be used for debugging. */
|
||||
#define __udiv_qrnnd_c(q, r, n1, n0, d) \
|
||||
do { \
|
||||
|
119
sysdeps/alpha/add_n.s
Normal file
119
sysdeps/alpha/add_n.s
Normal file
@ -0,0 +1,119 @@
|
||||
# Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||
# store sum in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $16
|
||||
# s1_ptr $17
|
||||
# s2_ptr $18
|
||||
# size $19
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_add_n
|
||||
.ent __mpn_add_n
|
||||
__mpn_add_n:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
ldq $3,0($17)
|
||||
ldq $4,0($18)
|
||||
|
||||
subq $19,1,$19
|
||||
and $19,4-1,$2 # number of limbs in first loop
|
||||
bis $31,$31,$0
|
||||
beq $2,.L0 # if multiple of 4 limbs, skip first loop
|
||||
|
||||
subq $19,$2,$19
|
||||
|
||||
.Loop0: subq $2,1,$2
|
||||
ldq $5,8($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,8($18)
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
|
||||
addq $17,8,$17
|
||||
addq $18,8,$18
|
||||
bis $5,$5,$3
|
||||
bis $6,$6,$4
|
||||
addq $16,8,$16
|
||||
bne $2,.Loop0
|
||||
|
||||
.L0: beq $19,.Lend
|
||||
|
||||
.align 3
|
||||
.Loop: subq $19,4,$19
|
||||
|
||||
ldq $5,8($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,8($18)
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
|
||||
ldq $3,16($17)
|
||||
addq $6,$0,$6
|
||||
ldq $4,16($18)
|
||||
cmpult $6,$0,$1
|
||||
addq $5,$6,$6
|
||||
cmpult $6,$5,$0
|
||||
stq $6,8($16)
|
||||
or $0,$1,$0
|
||||
|
||||
ldq $5,24($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,24($18)
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,16($16)
|
||||
or $0,$1,$0
|
||||
|
||||
ldq $3,32($17)
|
||||
addq $6,$0,$6
|
||||
ldq $4,32($18)
|
||||
cmpult $6,$0,$1
|
||||
addq $5,$6,$6
|
||||
cmpult $6,$5,$0
|
||||
stq $6,24($16)
|
||||
or $0,$1,$0
|
||||
|
||||
addq $17,32,$17
|
||||
addq $18,32,$18
|
||||
addq $16,32,$16
|
||||
bne $19,.Loop
|
||||
|
||||
.Lend: addq $4,$0,$4
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
ret $31,($26),1
|
||||
|
||||
.end __mpn_add_n
|
100
sysdeps/alpha/addmul_1.s
Normal file
100
sysdeps/alpha/addmul_1.s
Normal file
@ -0,0 +1,100 @@
|
||||
# Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||
# the result to a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# s2_limb r19
|
||||
|
||||
# This code runs at 42 cycles/limb on the 21064.
|
||||
|
||||
# To improve performance for long multiplications, we would use
|
||||
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
|
||||
# these instructions without slowing down the general code: 1. We can
|
||||
# only have two prefetches in operation at any time in the Alpha
|
||||
# architecture. 2. There will seldom be any special alignment
|
||||
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
|
||||
# loop into an inner and outer loop, having the inner loop handle
|
||||
# exactly one prefetch block?
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_addmul_1
|
||||
.ent __mpn_addmul_1 2
|
||||
__mpn_addmul_1:
|
||||
.frame $30,0,$26
|
||||
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
subq $18,1,$18 # size--
|
||||
mulq $2,$19,$3 # $3 = prod_low
|
||||
ldq $5,0($16) # $5 = *res_ptr
|
||||
umulh $2,$19,$0 # $0 = prod_high
|
||||
beq $18,Lend1 # jump if size was == 1
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
subq $18,1,$18 # size--
|
||||
addq $5,$3,$3
|
||||
cmpult $3,$5,$4
|
||||
stq $3,0($16)
|
||||
addq $16,8,$16 # res_ptr++
|
||||
beq $18,Lend2 # jump if size was == 2
|
||||
|
||||
.align 3
|
||||
Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||
ldq $5,0($16) # $5 = *res_ptr
|
||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||
subq $18,1,$18 # size--
|
||||
umulh $2,$19,$4 # $4 = cy_limb
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||
addq $5,$3,$3
|
||||
cmpult $3,$5,$5
|
||||
stq $3,0($16)
|
||||
addq $16,8,$16 # res_ptr++
|
||||
addq $5,$0,$0 # combine carries
|
||||
bne $18,Loop
|
||||
|
||||
Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||
ldq $5,0($16) # $5 = *res_ptr
|
||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||
umulh $2,$19,$4 # $4 = cy_limb
|
||||
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||
addq $5,$3,$3
|
||||
cmpult $3,$5,$5
|
||||
stq $3,0($16)
|
||||
addq $5,$0,$0 # combine carries
|
||||
addq $4,$0,$0 # cy_limb = prod_high + cy
|
||||
ret $31,($26),1
|
||||
Lend1: addq $5,$3,$3
|
||||
cmpult $3,$5,$5
|
||||
stq $3,0($16)
|
||||
addq $0,$5,$0
|
||||
ret $31,($26),1
|
||||
|
||||
.end __mpn_addmul_1
|
118
sysdeps/alpha/alphaev5/add_n.s
Normal file
118
sysdeps/alpha/alphaev5/add_n.s
Normal file
@ -0,0 +1,118 @@
|
||||
# Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||
# store sum in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $16
|
||||
# s1_ptr $17
|
||||
# s2_ptr $18
|
||||
# size $19
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_add_n
|
||||
.ent __mpn_add_n
|
||||
__mpn_add_n:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
ldq $3,0($17)
|
||||
ldq $4,0($18)
|
||||
|
||||
subq $19,1,$19
|
||||
and $19,4-1,$2 # number of limbs in first loop
|
||||
bis $31,$31,$0
|
||||
beq $2,.L0 # if multiple of 4 limbs, skip first loop
|
||||
|
||||
subq $19,$2,$19
|
||||
|
||||
.Loop0: subq $2,1,$2
|
||||
ldq $5,8($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,8($18)
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
|
||||
addq $17,8,$17
|
||||
addq $18,8,$18
|
||||
bis $5,$5,$3
|
||||
bis $6,$6,$4
|
||||
addq $16,8,$16
|
||||
bne $2,.Loop0
|
||||
|
||||
.L0: beq $19,.Lend
|
||||
|
||||
.align 4
|
||||
.Loop: subq $19,4,$19
|
||||
unop
|
||||
|
||||
ldq $6,8($18)
|
||||
addq $4,$0,$0
|
||||
ldq $5,8($17)
|
||||
cmpult $0,$4,$1
|
||||
ldq $4,16($18)
|
||||
addq $3,$0,$20
|
||||
cmpult $20,$3,$0
|
||||
ldq $3,16($17)
|
||||
or $0,$1,$0
|
||||
addq $6,$0,$0
|
||||
cmpult $0,$6,$1
|
||||
ldq $6,24($18)
|
||||
addq $5,$0,$21
|
||||
cmpult $21,$5,$0
|
||||
ldq $5,24($17)
|
||||
or $0,$1,$0
|
||||
addq $4,$0,$0
|
||||
cmpult $0,$4,$1
|
||||
ldq $4,32($18)
|
||||
addq $3,$0,$22
|
||||
cmpult $22,$3,$0
|
||||
ldq $3,32($17)
|
||||
or $0,$1,$0
|
||||
addq $6,$0,$0
|
||||
cmpult $0,$6,$1
|
||||
addq $5,$0,$23
|
||||
cmpult $23,$5,$0
|
||||
or $0,$1,$0
|
||||
|
||||
stq $20,0($16)
|
||||
stq $21,8($16)
|
||||
stq $22,16($16)
|
||||
stq $23,24($16)
|
||||
|
||||
addq $17,32,$17
|
||||
addq $18,32,$18
|
||||
addq $16,32,$16
|
||||
bne $19,.Loop
|
||||
|
||||
.Lend: addq $4,$0,$4
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
ret $31,($26),1
|
||||
|
||||
.end __mpn_add_n
|
175
sysdeps/alpha/alphaev5/lshift.s
Normal file
175
sysdeps/alpha/alphaev5/lshift.s
Normal file
@ -0,0 +1,175 @@
|
||||
# Alpha EV5 __mpn_lshift --
|
||||
|
||||
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# cnt r19
|
||||
|
||||
# This code runs at 4.25 cycles/limb on the EV5.
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_lshift
|
||||
.ent __mpn_lshift
|
||||
__mpn_lshift:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
s8addq $18,$17,$17 # make r17 point at end of s1
|
||||
ldq $4,-8($17) # load first limb
|
||||
subq $31,$19,$20
|
||||
s8addq $18,$16,$16 # make r16 point at end of RES
|
||||
subq $18,1,$18
|
||||
and $18,4-1,$28 # number of limbs in first loop
|
||||
srl $4,$20,$0 # compute function result
|
||||
|
||||
beq $28,L0
|
||||
subq $18,$28,$18
|
||||
|
||||
.align 3
|
||||
Loop0: ldq $3,-16($17)
|
||||
subq $16,8,$16
|
||||
sll $4,$19,$5
|
||||
subq $17,8,$17
|
||||
subq $28,1,$28
|
||||
srl $3,$20,$6
|
||||
or $3,$3,$4
|
||||
or $5,$6,$8
|
||||
stq $8,0($16)
|
||||
bne $28,Loop0
|
||||
|
||||
L0: sll $4,$19,$24
|
||||
beq $18,Lend
|
||||
# warm up phase 1
|
||||
ldq $1,-16($17)
|
||||
subq $18,4,$18
|
||||
ldq $2,-24($17)
|
||||
ldq $3,-32($17)
|
||||
ldq $4,-40($17)
|
||||
beq $18,Lcool1
|
||||
# warm up phase 2
|
||||
srl $1,$20,$7
|
||||
sll $1,$19,$21
|
||||
srl $2,$20,$8
|
||||
ldq $1,-48($17)
|
||||
sll $2,$19,$22
|
||||
ldq $2,-56($17)
|
||||
srl $3,$20,$5
|
||||
or $7,$24,$7
|
||||
sll $3,$19,$23
|
||||
or $8,$21,$8
|
||||
srl $4,$20,$6
|
||||
ldq $3,-64($17)
|
||||
sll $4,$19,$24
|
||||
ldq $4,-72($17)
|
||||
subq $18,4,$18
|
||||
beq $18,Lcool1
|
||||
.align 4
|
||||
# main loop
|
||||
Loop: stq $7,-8($16)
|
||||
or $5,$22,$5
|
||||
stq $8,-16($16)
|
||||
or $6,$23,$6
|
||||
|
||||
srl $1,$20,$7
|
||||
subq $18,4,$18
|
||||
sll $1,$19,$21
|
||||
unop # ldq $31,-96($17)
|
||||
|
||||
srl $2,$20,$8
|
||||
ldq $1,-80($17)
|
||||
sll $2,$19,$22
|
||||
ldq $2,-88($17)
|
||||
|
||||
stq $5,-24($16)
|
||||
or $7,$24,$7
|
||||
stq $6,-32($16)
|
||||
or $8,$21,$8
|
||||
|
||||
srl $3,$20,$5
|
||||
unop # ldq $31,-96($17)
|
||||
sll $3,$19,$23
|
||||
subq $16,32,$16
|
||||
|
||||
srl $4,$20,$6
|
||||
ldq $3,-96($17
|
||||
sll $4,$19,$24
|
||||
ldq $4,-104($17)
|
||||
|
||||
subq $17,32,$17
|
||||
bne $18,Loop
|
||||
unop
|
||||
unop
|
||||
# cool down phase 2/1
|
||||
Lcool1: stq $7,-8($16)
|
||||
or $5,$22,$5
|
||||
stq $8,-16($16)
|
||||
or $6,$23,$6
|
||||
srl $1,$20,$7
|
||||
sll $1,$19,$21
|
||||
srl $2,$20,$8
|
||||
sll $2,$19,$22
|
||||
stq $5,-24($16)
|
||||
or $7,$24,$7
|
||||
stq $6,-32($16)
|
||||
or $8,$21,$8
|
||||
srl $3,$20,$5
|
||||
sll $3,$19,$23
|
||||
srl $4,$20,$6
|
||||
sll $4,$19,$24
|
||||
# cool down phase 2/2
|
||||
stq $7,-40($16)
|
||||
or $5,$22,$5
|
||||
stq $8,-48($16)
|
||||
or $6,$23,$6
|
||||
stq $5,-56($16)
|
||||
stq $6,-64($16)
|
||||
# cool down phase 2/3
|
||||
stq $24,-72($16)
|
||||
ret $31,($26),1
|
||||
|
||||
# cool down phase 1/1
|
||||
Lcool1: srl $1,$20,$7
|
||||
sll $1,$19,$21
|
||||
srl $2,$20,$8
|
||||
sll $2,$19,$22
|
||||
srl $3,$20,$5
|
||||
or $7,$24,$7
|
||||
sll $3,$19,$23
|
||||
or $8,$21,$8
|
||||
srl $4,$20,$6
|
||||
sll $4,$19,$24
|
||||
# cool down phase 1/2
|
||||
stq $7,-8($16)
|
||||
or $5,$22,$5
|
||||
stq $8,-16($16)
|
||||
or $6,$23,$6
|
||||
stq $5,-24($16)
|
||||
stq $6,-32($16)
|
||||
stq $24,-40($16)
|
||||
ret $31,($26),1
|
||||
|
||||
Lend stq $24,-8($16)
|
||||
ret $31,($26),1
|
||||
.end __mpn_lshift
|
173
sysdeps/alpha/alphaev5/rshift.s
Normal file
173
sysdeps/alpha/alphaev5/rshift.s
Normal file
@ -0,0 +1,173 @@
|
||||
# Alpha EV5 __mpn_rshift --
|
||||
|
||||
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# cnt r19
|
||||
|
||||
# This code runs at 4.25 cycles/limb on the EV5.
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_rshift
|
||||
.ent __mpn_rshift
|
||||
__mpn_rshift:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
ldq $4,0($17) # load first limb
|
||||
subq $31,$19,$20
|
||||
subq $18,1,$18
|
||||
and $18,4-1,$28 # number of limbs in first loop
|
||||
sll $4,$20,$0 # compute function result
|
||||
|
||||
beq $28,L0
|
||||
subq $18,$28,$18
|
||||
|
||||
.align 3
|
||||
Loop0: ldq $3,8($17)
|
||||
addq $16,8,$16
|
||||
srl $4,$19,$5
|
||||
addq $17,8,$17
|
||||
subq $28,1,$28
|
||||
sll $3,$20,$6
|
||||
or $3,$3,$4
|
||||
or $5,$6,$8
|
||||
stq $8,-8($16)
|
||||
bne $28,Loop0
|
||||
|
||||
L0: srl $4,$19,$24
|
||||
beq $18,Lend
|
||||
# warm up phase 1
|
||||
ldq $1,8($17)
|
||||
subq $18,4,$18
|
||||
ldq $2,16($17)
|
||||
ldq $3,24($17)
|
||||
ldq $4,32($17)
|
||||
beq $18,Lcool1
|
||||
# warm up phase 2
|
||||
sll $1,$20,$7
|
||||
srl $1,$19,$21
|
||||
sll $2,$20,$8
|
||||
ldq $1,40($17)
|
||||
srl $2,$19,$22
|
||||
ldq $2,48($17)
|
||||
sll $3,$20,$5
|
||||
or $7,$24,$7
|
||||
srl $3,$19,$23
|
||||
or $8,$21,$8
|
||||
sll $4,$20,$6
|
||||
ldq $3,56($17)
|
||||
srl $4,$19,$24
|
||||
ldq $4,64($17)
|
||||
subq $18,4,$18
|
||||
beq $18,Lcool2
|
||||
.align 4
|
||||
# main loop
|
||||
Loop: stq $7,0($16)
|
||||
or $5,$22,$5
|
||||
stq $8,8($16)
|
||||
or $6,$23,$6
|
||||
|
||||
sll $1,$20,$7
|
||||
subq $18,4,$18
|
||||
srl $1,$19,$21
|
||||
unop # ldq $31,-96($17)
|
||||
|
||||
sll $2,$20,$8
|
||||
ldq $1,72($17)
|
||||
srl $2,$19,$22
|
||||
ldq $2,80($17)
|
||||
|
||||
stq $5,16($16)
|
||||
or $7,$24,$7
|
||||
stq $6,24($16)
|
||||
or $8,$21,$8
|
||||
|
||||
sll $3,$20,$5
|
||||
unop # ldq $31,-96($17)
|
||||
srl $3,$19,$23
|
||||
addq $16,32,$16
|
||||
|
||||
sll $4,$20,$6
|
||||
ldq $3,88($17)
|
||||
srl $4,$19,$24
|
||||
ldq $4,96($17)
|
||||
|
||||
addq $17,32,$17
|
||||
bne $18,Loop
|
||||
unop
|
||||
unop
|
||||
# cool down phase 2/1
|
||||
Lcool2: stq $7,0($16)
|
||||
or $5,$22,$5
|
||||
stq $8,8($16)
|
||||
or $6,$23,$6
|
||||
sll $1,$20,$7
|
||||
srl $1,$19,$21
|
||||
sll $2,$20,$8
|
||||
srl $2,$19,$22
|
||||
stq $5,16($16)
|
||||
or $7,$24,$7
|
||||
stq $6,24($16)
|
||||
or $8,$21,$8
|
||||
sll $3,$20,$5
|
||||
srl $3,$19,$23
|
||||
sll $4,$20,$6
|
||||
srl $4,$19,$24
|
||||
# cool down phase 2/2
|
||||
stq $7,32($16)
|
||||
or $5,$22,$5
|
||||
stq $8,40($16)
|
||||
or $6,$23,$6
|
||||
stq $5,48($16)
|
||||
stq $6,56($16)
|
||||
# cool down phase 2/3
|
||||
stq $24,64($16)
|
||||
ret $31,($26),1
|
||||
|
||||
# cool down phase 1/1
|
||||
Lcool1: sll $1,$20,$7
|
||||
srl $1,$19,$21
|
||||
sll $2,$20,$8
|
||||
srl $2,$19,$22
|
||||
sll $3,$20,$5
|
||||
or $7,$24,$7
|
||||
srl $3,$19,$23
|
||||
or $8,$21,$8
|
||||
sll $4,$20,$6
|
||||
srl $4,$19,$24
|
||||
# cool down phase 1/2
|
||||
stq $7,0($16)
|
||||
or $5,$22,$5
|
||||
stq $8,8($16)
|
||||
or $6,$23,$6
|
||||
stq $5,16($16)
|
||||
stq $6,24($16)
|
||||
stq $24,32($16)
|
||||
ret $31,($26),1
|
||||
|
||||
Lend: stq $24,0($16)
|
||||
ret $31,($26),1
|
||||
.end __mpn_rshift
|
108
sysdeps/alpha/lshift.s
Normal file
108
sysdeps/alpha/lshift.s
Normal file
@ -0,0 +1,108 @@
|
||||
# Alpha 21064 __mpn_lshift --
|
||||
|
||||
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# cnt r19
|
||||
|
||||
# This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
|
||||
# it would take 4 cycles/limb. It should be possible to get down to 3
|
||||
# cycles/limb since both ldq and stq can be paired with the other used
|
||||
# instructions. But there are many restrictions in the 21064 pipeline that
|
||||
# makes it hard, if not impossible, to get down to 3 cycles/limb:
|
||||
|
||||
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
|
||||
# 2. Only aligned instruction pairs can be paired.
|
||||
# 3. The store buffer or silo might not be able to deal with the bandwidth.
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_lshift
|
||||
.ent __mpn_lshift
|
||||
__mpn_lshift:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
s8addq $18,$17,$17 # make r17 point at end of s1
|
||||
ldq $4,-8($17) # load first limb
|
||||
subq $17,8,$17
|
||||
subq $31,$19,$7
|
||||
s8addq $18,$16,$16 # make r16 point at end of RES
|
||||
subq $18,1,$18
|
||||
and $18,4-1,$20 # number of limbs in first loop
|
||||
srl $4,$7,$0 # compute function result
|
||||
|
||||
beq $20,L0
|
||||
subq $18,$20,$18
|
||||
|
||||
.align 3
|
||||
Loop0:
|
||||
ldq $3,-8($17)
|
||||
subq $16,8,$16
|
||||
subq $17,8,$17
|
||||
subq $20,1,$20
|
||||
sll $4,$19,$5
|
||||
srl $3,$7,$6
|
||||
bis $3,$3,$4
|
||||
bis $5,$6,$8
|
||||
stq $8,0($16)
|
||||
bne $20,Loop0
|
||||
|
||||
L0: beq $18,Lend
|
||||
|
||||
.align 3
|
||||
Loop: ldq $3,-8($17)
|
||||
subq $16,32,$16
|
||||
subq $18,4,$18
|
||||
sll $4,$19,$5
|
||||
srl $3,$7,$6
|
||||
|
||||
ldq $4,-16($17)
|
||||
sll $3,$19,$1
|
||||
bis $5,$6,$8
|
||||
stq $8,24($16)
|
||||
srl $4,$7,$2
|
||||
|
||||
ldq $3,-24($17)
|
||||
sll $4,$19,$5
|
||||
bis $1,$2,$8
|
||||
stq $8,16($16)
|
||||
srl $3,$7,$6
|
||||
|
||||
ldq $4,-32($17)
|
||||
sll $3,$19,$1
|
||||
bis $5,$6,$8
|
||||
stq $8,8($16)
|
||||
srl $4,$7,$2
|
||||
|
||||
subq $17,32,$17
|
||||
bis $1,$2,$8
|
||||
stq $8,0($16)
|
||||
|
||||
bgt $18,Loop
|
||||
|
||||
Lend: sll $4,$19,$8
|
||||
stq $8,-8($16)
|
||||
ret $31,($26),1
|
||||
.end __mpn_lshift
|
84
sysdeps/alpha/mul_1.s
Normal file
84
sysdeps/alpha/mul_1.s
Normal file
@ -0,0 +1,84 @@
|
||||
# Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||
# the result in a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# s2_limb r19
|
||||
|
||||
# This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
|
||||
|
||||
# To improve performance for long multiplications, we would use
|
||||
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
|
||||
# these instructions without slowing down the general code: 1. We can
|
||||
# only have two prefetches in operation at any time in the Alpha
|
||||
# architecture. 2. There will seldom be any special alignment
|
||||
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
|
||||
# loop into an inner and outer loop, having the inner loop handle
|
||||
# exactly one prefetch block?
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_mul_1
|
||||
.ent __mpn_mul_1 2
|
||||
__mpn_mul_1:
|
||||
.frame $30,0,$26
|
||||
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
subq $18,1,$18 # size--
|
||||
mulq $2,$19,$3 # $3 = prod_low
|
||||
bic $31,$31,$4 # clear cy_limb
|
||||
umulh $2,$19,$0 # $0 = prod_high
|
||||
beq $18,Lend1 # jump if size was == 1
|
||||
ldq $2,8($17) # $2 = s1_limb
|
||||
subq $18,1,$18 # size--
|
||||
stq $3,0($16)
|
||||
beq $18,Lend2 # jump if size was == 2
|
||||
|
||||
.align 3
|
||||
Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||
subq $18,1,$18 # size--
|
||||
umulh $2,$19,$4 # $4 = cy_limb
|
||||
ldq $2,16($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||
stq $3,8($16)
|
||||
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||
addq $16,8,$16 # res_ptr++
|
||||
bne $18,Loop
|
||||
|
||||
Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||
umulh $2,$19,$4 # $4 = cy_limb
|
||||
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||
stq $3,8($16)
|
||||
addq $4,$0,$0 # cy_limb = prod_high + cy
|
||||
ret $31,($26),1
|
||||
Lend1: stq $3,0($16)
|
||||
ret $31,($26),1
|
||||
|
||||
.end __mpn_mul_1
|
106
sysdeps/alpha/rshift.s
Normal file
106
sysdeps/alpha/rshift.s
Normal file
@ -0,0 +1,106 @@
|
||||
# Alpha 21064 __mpn_rshift --
|
||||
|
||||
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# cnt r19
|
||||
|
||||
# This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
|
||||
# it would take 4 cycles/limb. It should be possible to get down to 3
|
||||
# cycles/limb since both ldq and stq can be paired with the other used
|
||||
# instructions. But there are many restrictions in the 21064 pipeline that
|
||||
# makes it hard, if not impossible, to get down to 3 cycles/limb:
|
||||
|
||||
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
|
||||
# 2. Only aligned instruction pairs can be paired.
|
||||
# 3. The store buffer or silo might not be able to deal with the bandwidth.
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_rshift
|
||||
.ent __mpn_rshift
|
||||
__mpn_rshift:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
ldq $4,0($17) # load first limb
|
||||
addq $17,8,$17
|
||||
subq $31,$19,$7
|
||||
subq $18,1,$18
|
||||
and $18,4-1,$20 # number of limbs in first loop
|
||||
sll $4,$7,$0 # compute function result
|
||||
|
||||
beq $20,L0
|
||||
subq $18,$20,$18
|
||||
|
||||
.align 3
|
||||
Loop0:
|
||||
ldq $3,0($17)
|
||||
addq $16,8,$16
|
||||
addq $17,8,$17
|
||||
subq $20,1,$20
|
||||
srl $4,$19,$5
|
||||
sll $3,$7,$6
|
||||
bis $3,$3,$4
|
||||
bis $5,$6,$8
|
||||
stq $8,-8($16)
|
||||
bne $20,Loop0
|
||||
|
||||
L0: beq $18,Lend
|
||||
|
||||
.align 3
|
||||
Loop: ldq $3,0($17)
|
||||
addq $16,32,$16
|
||||
subq $18,4,$18
|
||||
srl $4,$19,$5
|
||||
sll $3,$7,$6
|
||||
|
||||
ldq $4,8($17)
|
||||
srl $3,$19,$1
|
||||
bis $5,$6,$8
|
||||
stq $8,-32($16)
|
||||
sll $4,$7,$2
|
||||
|
||||
ldq $3,16($17)
|
||||
srl $4,$19,$5
|
||||
bis $1,$2,$8
|
||||
stq $8,-24($16)
|
||||
sll $3,$7,$6
|
||||
|
||||
ldq $4,24($17)
|
||||
srl $3,$19,$1
|
||||
bis $5,$6,$8
|
||||
stq $8,-16($16)
|
||||
sll $4,$7,$2
|
||||
|
||||
addq $17,32,$17
|
||||
bis $1,$2,$8
|
||||
stq $8,-8($16)
|
||||
|
||||
bgt $18,Loop
|
||||
|
||||
Lend: srl $4,$19,$8
|
||||
stq $8,0($16)
|
||||
ret $31,($26),1
|
||||
.end __mpn_rshift
|
119
sysdeps/alpha/sub_n.s
Normal file
119
sysdeps/alpha/sub_n.s
Normal file
@ -0,0 +1,119 @@
|
||||
# Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||
# store difference in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $16
|
||||
# s1_ptr $17
|
||||
# s2_ptr $18
|
||||
# size $19
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_sub_n
|
||||
.ent __mpn_sub_n
|
||||
__mpn_sub_n:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
ldq $3,0($17)
|
||||
ldq $4,0($18)
|
||||
|
||||
subq $19,1,$19
|
||||
and $19,4-1,$2 # number of limbs in first loop
|
||||
bis $31,$31,$0
|
||||
beq $2,.L0 # if multiple of 4 limbs, skip first loop
|
||||
|
||||
subq $19,$2,$19
|
||||
|
||||
.Loop0: subq $2,1,$2
|
||||
ldq $5,8($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,8($18)
|
||||
cmpult $4,$0,$1
|
||||
subq $3,$4,$4
|
||||
cmpult $3,$4,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
|
||||
addq $17,8,$17
|
||||
addq $18,8,$18
|
||||
bis $5,$5,$3
|
||||
bis $6,$6,$4
|
||||
addq $16,8,$16
|
||||
bne $2,.Loop0
|
||||
|
||||
.L0: beq $19,.Lend
|
||||
|
||||
.align 3
|
||||
.Loop: subq $19,4,$19
|
||||
|
||||
ldq $5,8($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,8($18)
|
||||
cmpult $4,$0,$1
|
||||
subq $3,$4,$4
|
||||
cmpult $3,$4,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
|
||||
ldq $3,16($17)
|
||||
addq $6,$0,$6
|
||||
ldq $4,16($18)
|
||||
cmpult $6,$0,$1
|
||||
subq $5,$6,$6
|
||||
cmpult $5,$6,$0
|
||||
stq $6,8($16)
|
||||
or $0,$1,$0
|
||||
|
||||
ldq $5,24($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,24($18)
|
||||
cmpult $4,$0,$1
|
||||
subq $3,$4,$4
|
||||
cmpult $3,$4,$0
|
||||
stq $4,16($16)
|
||||
or $0,$1,$0
|
||||
|
||||
ldq $3,32($17)
|
||||
addq $6,$0,$6
|
||||
ldq $4,32($18)
|
||||
cmpult $6,$0,$1
|
||||
subq $5,$6,$6
|
||||
cmpult $5,$6,$0
|
||||
stq $6,24($16)
|
||||
or $0,$1,$0
|
||||
|
||||
addq $17,32,$17
|
||||
addq $18,32,$18
|
||||
addq $16,32,$16
|
||||
bne $19,.Loop
|
||||
|
||||
.Lend: addq $4,$0,$4
|
||||
cmpult $4,$0,$1
|
||||
subq $3,$4,$4
|
||||
cmpult $3,$4,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
ret $31,($26),1
|
||||
|
||||
.end __mpn_sub_n
|
100
sysdeps/alpha/submul_1.s
Normal file
100
sysdeps/alpha/submul_1.s
Normal file
@ -0,0 +1,100 @@
|
||||
# Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and
|
||||
# subtract the result from a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# s2_limb r19
|
||||
|
||||
# This code runs at 42 cycles/limb on the 21064.
|
||||
|
||||
# To improve performance for long multiplications, we would use
|
||||
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
|
||||
# these instructions without slowing down the general code: 1. We can
|
||||
# only have two prefetches in operation at any time in the Alpha
|
||||
# architecture. 2. There will seldom be any special alignment
|
||||
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
|
||||
# loop into an inner and outer loop, having the inner loop handle
|
||||
# exactly one prefetch block?
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_submul_1
|
||||
.ent __mpn_submul_1 2
|
||||
__mpn_submul_1:
|
||||
.frame $30,0,$26
|
||||
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
subq $18,1,$18 # size--
|
||||
mulq $2,$19,$3 # $3 = prod_low
|
||||
ldq $5,0($16) # $5 = *res_ptr
|
||||
umulh $2,$19,$0 # $0 = prod_high
|
||||
beq $18,Lend1 # jump if size was == 1
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
subq $18,1,$18 # size--
|
||||
subq $5,$3,$3
|
||||
cmpult $5,$3,$4
|
||||
stq $3,0($16)
|
||||
addq $16,8,$16 # res_ptr++
|
||||
beq $18,Lend2 # jump if size was == 2
|
||||
|
||||
.align 3
|
||||
Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||
ldq $5,0($16) # $5 = *res_ptr
|
||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||
subq $18,1,$18 # size--
|
||||
umulh $2,$19,$4 # $4 = cy_limb
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||
subq $5,$3,$3
|
||||
cmpult $5,$3,$5
|
||||
stq $3,0($16)
|
||||
addq $16,8,$16 # res_ptr++
|
||||
addq $5,$0,$0 # combine carries
|
||||
bne $18,Loop
|
||||
|
||||
Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||
ldq $5,0($16) # $5 = *res_ptr
|
||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||
umulh $2,$19,$4 # $4 = cy_limb
|
||||
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||
subq $5,$3,$3
|
||||
cmpult $5,$3,$5
|
||||
stq $3,0($16)
|
||||
addq $5,$0,$0 # combine carries
|
||||
addq $4,$0,$0 # cy_limb = prod_high + cy
|
||||
ret $31,($26),1
|
||||
Lend1: subq $5,$3,$3
|
||||
cmpult $5,$3,$5
|
||||
stq $3,0($16)
|
||||
addq $0,$5,$0
|
||||
ret $31,($26),1
|
||||
|
||||
.end __mpn_submul_1
|
@ -134,7 +134,7 @@ Loop2: cmplt n0,0,tmp
|
||||
ret $31,($26),1
|
||||
|
||||
Odd:
|
||||
/* q' in n0. r' in n1. */
|
||||
/* q' in n0. r' in n1 */
|
||||
addq n1,n0,n1
|
||||
cmpult n1,n0,tmp # tmp := carry from addq
|
||||
beq tmp,LLp6
|
||||
|
@ -83,14 +83,12 @@ __mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
|
||||
result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
|
||||
most significant bit (with weight 2**N) implicit. */
|
||||
|
||||
#if 0 /* This can't happen when normalization_steps != 0 */
|
||||
/* Special case for DIVISOR_LIMB == 100...000. */
|
||||
if (divisor_limb << 1 == 0)
|
||||
divisor_limb_inverted = ~(mp_limb) 0;
|
||||
else
|
||||
#endif
|
||||
udiv_qrnnd (divisor_limb_inverted, dummy,
|
||||
-divisor_limb, 0, divisor_limb);
|
||||
udiv_qrnnd (divisor_limb_inverted, dummy,
|
||||
-divisor_limb, 0, divisor_limb);
|
||||
|
||||
n1 = dividend_ptr[dividend_size - 1];
|
||||
r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
|
||||
|
@ -3,8 +3,6 @@
|
||||
Return the single-limb remainder.
|
||||
There are no constraints on the value of the divisor.
|
||||
|
||||
QUOT_PTR and DIVIDEND_PTR might point to the same limb.
|
||||
|
||||
Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
57
sysdeps/hppa/add_n.s
Normal file
57
sysdeps/hppa/add_n.s
Normal file
@ -0,0 +1,57 @@
|
||||
; HP-PA __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||
; sum in a third limb vector.
|
||||
|
||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr gr26
|
||||
; s1_ptr gr25
|
||||
; s2_ptr gr24
|
||||
; size gr23
|
||||
|
||||
; One might want to unroll this as for other processors, but it turns
|
||||
; out that the data cache contention after a store makes such
|
||||
; unrolling useless. We can't come under 5 cycles/limb anyway.
|
||||
|
||||
.code
|
||||
.export __mpn_add_n
|
||||
__mpn_add_n
|
||||
.proc
|
||||
.callinfo frame=0,no_calls
|
||||
.entry
|
||||
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
|
||||
addib,= -1,%r23,L$end ; check for (SIZE == 1)
|
||||
add %r20,%r19,%r28 ; add first limbs ignoring cy
|
||||
|
||||
L$loop ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
addib,<> -1,%r23,L$loop
|
||||
addc %r20,%r19,%r28
|
||||
|
||||
L$end stws %r28,0(0,%r26)
|
||||
bv 0(%r2)
|
||||
addc %r0,%r0,%r28
|
||||
|
||||
.exit
|
||||
.procend
|
101
sysdeps/hppa/hppa1.1/addmul_1.s
Normal file
101
sysdeps/hppa/hppa1.1/addmul_1.s
Normal file
@ -0,0 +1,101 @@
|
||||
; HP-PA-1.1 __mpn_addmul_1 -- Multiply a limb vector with a limb and
|
||||
; add the result to a second limb vector.
|
||||
|
||||
; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr r26
|
||||
; s1_ptr r25
|
||||
; size r24
|
||||
; s2_limb r23
|
||||
|
||||
; This runs at 11 cycles/limb on a PA7000. With the used instructions, it
|
||||
; can not become faster due to data cache contention after a store. On the
|
||||
; PA7100 it runs at 10 cycles/limb, and that can not be improved either,
|
||||
; since only the xmpyu does not need the integer pipeline, so the only
|
||||
; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
|
||||
; on the PA7100.
|
||||
|
||||
; There are some ideas described in mul_1.s that applies to this code too.
|
||||
|
||||
.code
|
||||
.export __mpn_addmul_1
|
||||
__mpn_addmul_1
|
||||
.proc
|
||||
.callinfo frame=64,no_calls
|
||||
.entry
|
||||
|
||||
ldo 64(%r30),%r30
|
||||
fldws,ma 4(%r25),%fr5
|
||||
stw %r23,-16(%r30) ; move s2_limb ...
|
||||
addib,= -1,%r24,L$just_one_limb
|
||||
fldws -16(%r30),%fr4 ; ... into fr4
|
||||
add %r0,%r0,%r0 ; clear carry
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
fldws,ma 4(%r25),%fr7
|
||||
fstds %fr6,-16(%r30)
|
||||
xmpyu %fr4,%fr7,%fr8
|
||||
ldw -12(%r30),%r19 ; least significant limb in product
|
||||
ldw -16(%r30),%r28
|
||||
|
||||
fstds %fr8,-16(%r30)
|
||||
addib,= -1,%r24,L$end
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
; Main loop
|
||||
L$loop ldws 0(%r26),%r29
|
||||
fldws,ma 4(%r25),%fr5
|
||||
add %r29,%r19,%r19
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw -16(%r30),%r28
|
||||
fstds %fr6,-16(%r30)
|
||||
addc %r0,%r28,%r28
|
||||
addib,<> -1,%r24,L$loop
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
L$end ldw 0(%r26),%r29
|
||||
add %r29,%r19,%r19
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
ldw -16(%r30),%r28
|
||||
ldws 0(%r26),%r29
|
||||
addc %r0,%r28,%r28
|
||||
add %r29,%r19,%r19
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
L$just_one_limb
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw 0(%r26),%r29
|
||||
fstds %fr6,-16(%r30)
|
||||
ldw -12(%r30),%r1
|
||||
ldw -16(%r30),%r28
|
||||
add %r29,%r1,%r19
|
||||
stw %r19,0(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
.exit
|
||||
.procend
|
97
sysdeps/hppa/hppa1.1/mul_1.s
Normal file
97
sysdeps/hppa/hppa1.1/mul_1.s
Normal file
@ -0,0 +1,97 @@
|
||||
; HP-PA-1.1 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||
; the result in a second limb vector.
|
||||
|
||||
; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr r26
|
||||
; s1_ptr r25
|
||||
; size r24
|
||||
; s2_limb r23
|
||||
|
||||
; This runs at 9 cycles/limb on a PA7000. With the used instructions, it can
|
||||
; not become faster due to data cache contention after a store. On the
|
||||
; PA7100 it runs at 7 cycles/limb, and that can not be improved either, since
|
||||
; only the xmpyu does not need the integer pipeline, so the only dual-issue
|
||||
; we will get are addc+xmpyu. Unrolling would not help either CPU.
|
||||
|
||||
; We could use fldds to read two limbs at a time from the S1 array, and that
|
||||
; could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
|
||||
; PA7100, respectively. We don't do that since it does not seem worth the
|
||||
; (alignment) troubles...
|
||||
|
||||
; At least the PA7100 is rumored to be able to deal with cache-misses
|
||||
; without stalling instruction issue. If this is true, and the cache is
|
||||
; actually also lockup-free, we should use a deeper software pipeline, and
|
||||
; load from S1 very early! (The loads and stores to -12(sp) will surely be
|
||||
; in the cache.)
|
||||
|
||||
.code
|
||||
.export __mpn_mul_1
|
||||
__mpn_mul_1
|
||||
.proc
|
||||
.callinfo frame=64,no_calls
|
||||
.entry
|
||||
|
||||
ldo 64(%r30),%r30
|
||||
fldws,ma 4(%r25),%fr5
|
||||
stw %r23,-16(%r30) ; move s2_limb ...
|
||||
addib,= -1,%r24,L$just_one_limb
|
||||
fldws -16(%r30),%fr4 ; ... into fr4
|
||||
add %r0,%r0,%r0 ; clear carry
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
fldws,ma 4(%r25),%fr7
|
||||
fstds %fr6,-16(%r30)
|
||||
xmpyu %fr4,%fr7,%fr8
|
||||
ldw -12(%r30),%r19 ; least significant limb in product
|
||||
ldw -16(%r30),%r28
|
||||
|
||||
fstds %fr8,-16(%r30)
|
||||
addib,= -1,%r24,L$end
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
; Main loop
|
||||
L$loop fldws,ma 4(%r25),%fr5
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw -16(%r30),%r28
|
||||
fstds %fr6,-16(%r30)
|
||||
addib,<> -1,%r24,L$loop
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
L$end stws,ma %r19,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
ldw -16(%r30),%r28
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
L$just_one_limb
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
fstds %fr6,-16(%r30)
|
||||
ldw -16(%r30),%r28
|
||||
ldo -64(%r30),%r30
|
||||
bv 0(%r2)
|
||||
fstws %fr6R,0(%r26)
|
||||
|
||||
.exit
|
||||
.procend
|
110
sysdeps/hppa/hppa1.1/submul_1.s
Normal file
110
sysdeps/hppa/hppa1.1/submul_1.s
Normal file
@ -0,0 +1,110 @@
|
||||
; HP-PA-1.1 __mpn_submul_1 -- Multiply a limb vector with a limb and
|
||||
; subtract the result from a second limb vector.
|
||||
|
||||
; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr r26
|
||||
; s1_ptr r25
|
||||
; size r24
|
||||
; s2_limb r23
|
||||
|
||||
; This runs at 12 cycles/limb on a PA7000. With the used instructions, it
|
||||
; can not become faster due to data cache contention after a store. On the
|
||||
; PA7100 it runs at 11 cycles/limb, and that can not be improved either,
|
||||
; since only the xmpyu does not need the integer pipeline, so the only
|
||||
; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
|
||||
; on the PA7100.
|
||||
|
||||
; There are some ideas described in mul_1.s that applies to this code too.
|
||||
|
||||
; It seems possible to make this run as fast as __mpn_addmul_1, if we use
|
||||
; sub,>>= %r29,%r19,%r22
|
||||
; addi 1,%r28,%r28
|
||||
; but that requires reworking the hairy software pipeline...
|
||||
|
||||
.code
|
||||
.export __mpn_submul_1
|
||||
__mpn_submul_1
|
||||
.proc
|
||||
.callinfo frame=64,no_calls
|
||||
.entry
|
||||
|
||||
ldo 64(%r30),%r30
|
||||
fldws,ma 4(%r25),%fr5
|
||||
stw %r23,-16(%r30) ; move s2_limb ...
|
||||
addib,= -1,%r24,L$just_one_limb
|
||||
fldws -16(%r30),%fr4 ; ... into fr4
|
||||
add %r0,%r0,%r0 ; clear carry
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
fldws,ma 4(%r25),%fr7
|
||||
fstds %fr6,-16(%r30)
|
||||
xmpyu %fr4,%fr7,%fr8
|
||||
ldw -12(%r30),%r19 ; least significant limb in product
|
||||
ldw -16(%r30),%r28
|
||||
|
||||
fstds %fr8,-16(%r30)
|
||||
addib,= -1,%r24,L$end
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
; Main loop
|
||||
L$loop ldws 0(%r26),%r29
|
||||
fldws,ma 4(%r25),%fr5
|
||||
sub %r29,%r19,%r22
|
||||
add %r22,%r19,%r0
|
||||
stws,ma %r22,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw -16(%r30),%r28
|
||||
fstds %fr6,-16(%r30)
|
||||
addc %r0,%r28,%r28
|
||||
addib,<> -1,%r24,L$loop
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
L$end ldw 0(%r26),%r29
|
||||
sub %r29,%r19,%r22
|
||||
add %r22,%r19,%r0
|
||||
stws,ma %r22,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
ldw -16(%r30),%r28
|
||||
ldws 0(%r26),%r29
|
||||
addc %r0,%r28,%r28
|
||||
sub %r29,%r19,%r22
|
||||
add %r22,%r19,%r0
|
||||
stws,ma %r22,4(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
L$just_one_limb
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw 0(%r26),%r29
|
||||
fstds %fr6,-16(%r30)
|
||||
ldw -12(%r30),%r1
|
||||
ldw -16(%r30),%r28
|
||||
sub %r29,%r1,%r22
|
||||
add %r22,%r1,%r0
|
||||
stw %r22,0(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
.exit
|
||||
.procend
|
74
sysdeps/hppa/hppa1.1/udiv_qrnnd.s
Normal file
74
sysdeps/hppa/hppa1.1/udiv_qrnnd.s
Normal file
@ -0,0 +1,74 @@
|
||||
; HP-PA __udiv_qrnnd division support, used from longlong.h.
|
||||
; This version runs fast on PA 7000 and later.
|
||||
|
||||
; Copyright (C) 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; rem_ptr gr26
|
||||
; n1 gr25
|
||||
; n0 gr24
|
||||
; d gr23
|
||||
|
||||
.code
|
||||
L$0000 .word 0x43f00000
|
||||
.word 0x0
|
||||
.export __udiv_qrnnd
|
||||
__udiv_qrnnd
|
||||
.proc
|
||||
.callinfo frame=64,no_calls
|
||||
.entry
|
||||
ldo 64(%r30),%r30
|
||||
|
||||
stws %r25,-16(0,%r30) ; n_hi
|
||||
stws %r24,-12(0,%r30) ; n_lo
|
||||
ldil L'L$0000,%r19
|
||||
ldo R'L$0000(%r19),%r19
|
||||
fldds -16(0,%r30),%fr5
|
||||
stws %r23,-12(0,%r30)
|
||||
comib,<= 0,%r25,L$1
|
||||
fcnvxf,dbl,dbl %fr5,%fr5
|
||||
fldds 0(0,%r19),%fr4
|
||||
fadd,dbl %fr4,%fr5,%fr5
|
||||
L$1
|
||||
fcpy,sgl %fr0,%fr6L
|
||||
fldws -12(0,%r30),%fr6R
|
||||
fcnvxf,dbl,dbl %fr6,%fr4
|
||||
|
||||
fdiv,dbl %fr5,%fr4,%fr5
|
||||
|
||||
fcnvfx,dbl,dbl %fr5,%fr4
|
||||
fstws %fr4R,-16(%r30)
|
||||
xmpyu %fr4R,%fr6R,%fr6
|
||||
ldws -16(%r30),%r28
|
||||
fstds %fr6,-16(0,%r30)
|
||||
ldws -12(0,%r30),%r21
|
||||
ldws -16(0,%r30),%r20
|
||||
sub %r24,%r21,%r22
|
||||
subb %r25,%r20,%r19
|
||||
comib,= 0,%r19,L$2
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
add %r22,%r23,%r22
|
||||
ldo -1(%r28),%r28
|
||||
L$2 bv 0(%r2)
|
||||
stws %r22,0(0,%r26)
|
||||
|
||||
.exit
|
||||
.procend
|
65
sysdeps/hppa/lshift.s
Normal file
65
sysdeps/hppa/lshift.s
Normal file
@ -0,0 +1,65 @@
|
||||
; HP-PA __mpn_lshift --
|
||||
|
||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr gr26
|
||||
; s_ptr gr25
|
||||
; size gr24
|
||||
; cnt gr23
|
||||
|
||||
.code
|
||||
.export __mpn_lshift
|
||||
__mpn_lshift
|
||||
.proc
|
||||
.callinfo frame=64,no_calls
|
||||
.entry
|
||||
|
||||
sh2add %r24,%r25,%r25
|
||||
sh2add %r24,%r26,%r26
|
||||
ldws,mb -4(0,%r25),%r22
|
||||
subi 32,%r23,%r1
|
||||
mtsar %r1
|
||||
addib,= -1,%r24,L$0004
|
||||
vshd %r0,%r22,%r28 ; compute carry out limb
|
||||
ldws,mb -4(0,%r25),%r29
|
||||
addib,= -1,%r24,L$0002
|
||||
vshd %r22,%r29,%r20
|
||||
|
||||
L$loop ldws,mb -4(0,%r25),%r22
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
addib,= -1,%r24,L$0003
|
||||
vshd %r29,%r22,%r20
|
||||
ldws,mb -4(0,%r25),%r29
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
addib,<> -1,%r24,L$loop
|
||||
vshd %r22,%r29,%r20
|
||||
|
||||
L$0002 stws,mb %r20,-4(0,%r26)
|
||||
vshd %r29,%r0,%r20
|
||||
bv 0(%r2)
|
||||
stw %r20,-4(0,%r26)
|
||||
L$0003 stws,mb %r20,-4(0,%r26)
|
||||
L$0004 vshd %r22,%r0,%r20
|
||||
bv 0(%r2)
|
||||
stw %r20,-4(0,%r26)
|
||||
|
||||
.exit
|
||||
.procend
|
62
sysdeps/hppa/rshift.s
Normal file
62
sysdeps/hppa/rshift.s
Normal file
@ -0,0 +1,62 @@
|
||||
; HP-PA __mpn_rshift --
|
||||
|
||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr gr26
|
||||
; s_ptr gr25
|
||||
; size gr24
|
||||
; cnt gr23
|
||||
|
||||
.code
|
||||
.export __mpn_rshift
|
||||
__mpn_rshift
|
||||
.proc
|
||||
.callinfo frame=64,no_calls
|
||||
.entry
|
||||
|
||||
ldws,ma 4(0,%r25),%r22
|
||||
mtsar %r23
|
||||
addib,= -1,%r24,L$0004
|
||||
vshd %r22,%r0,%r28 ; compute carry out limb
|
||||
ldws,ma 4(0,%r25),%r29
|
||||
addib,= -1,%r24,L$0002
|
||||
vshd %r29,%r22,%r20
|
||||
|
||||
L$loop ldws,ma 4(0,%r25),%r22
|
||||
stws,ma %r20,4(0,%r26)
|
||||
addib,= -1,%r24,L$0003
|
||||
vshd %r22,%r29,%r20
|
||||
ldws,ma 4(0,%r25),%r29
|
||||
stws,ma %r20,4(0,%r26)
|
||||
addib,<> -1,%r24,L$loop
|
||||
vshd %r29,%r22,%r20
|
||||
|
||||
L$0002 stws,ma %r20,4(0,%r26)
|
||||
vshd %r0,%r29,%r20
|
||||
bv 0(%r2)
|
||||
stw %r20,0(0,%r26)
|
||||
L$0003 stws,ma %r20,4(0,%r26)
|
||||
L$0004 vshd %r0,%r22,%r20
|
||||
bv 0(%r2)
|
||||
stw %r20,0(0,%r26)
|
||||
|
||||
.exit
|
||||
.procend
|
58
sysdeps/hppa/sub_n.s
Normal file
58
sysdeps/hppa/sub_n.s
Normal file
@ -0,0 +1,58 @@
|
||||
; HP-PA __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||
; store difference in a third limb vector.
|
||||
|
||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr gr26
|
||||
; s1_ptr gr25
|
||||
; s2_ptr gr24
|
||||
; size gr23
|
||||
|
||||
; One might want to unroll this as for other processors, but it turns
|
||||
; out that the data cache contention after a store makes such
|
||||
; unrolling useless. We can't come under 5 cycles/limb anyway.
|
||||
|
||||
.code
|
||||
.export __mpn_sub_n
|
||||
__mpn_sub_n
|
||||
.proc
|
||||
.callinfo frame=0,no_calls
|
||||
.entry
|
||||
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
|
||||
addib,= -1,%r23,L$end ; check for (SIZE == 1)
|
||||
sub %r20,%r19,%r28 ; subtract first limbs ignoring cy
|
||||
|
||||
L$loop ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
addib,<> -1,%r23,L$loop
|
||||
subb %r20,%r19,%r28
|
||||
|
||||
L$end stws %r28,0(0,%r26)
|
||||
addc %r0,%r0,%r28
|
||||
bv 0(%r2)
|
||||
subi 1,%r28,%r28
|
||||
|
||||
.exit
|
||||
.procend
|
285
sysdeps/hppa/udiv_qrnnd.s
Normal file
285
sysdeps/hppa/udiv_qrnnd.s
Normal file
@ -0,0 +1,285 @@
|
||||
; HP-PA __udiv_qrnnd division support, used from longlong.h.
|
||||
; This version runs fast on pre-PA7000 CPUs.
|
||||
|
||||
; Copyright (C) 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; rem_ptr gr26
|
||||
; n1 gr25
|
||||
; n0 gr24
|
||||
; d gr23
|
||||
|
||||
; The code size is a bit excessive. We could merge the last two ds;addc
|
||||
; sequences by simply moving the "bb,< Odd" instruction down. The only
|
||||
; trouble is the FFFFFFFF code that would need some hacking.
|
||||
|
||||
.code
|
||||
.export __udiv_qrnnd
|
||||
__udiv_qrnnd
|
||||
.proc
|
||||
.callinfo frame=0,no_calls
|
||||
.entry
|
||||
|
||||
comb,< %r23,0,L$largedivisor
|
||||
sub %r0,%r23,%r1 ; clear cy as side-effect
|
||||
ds %r0,%r1,%r0
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r23,%r25
|
||||
addc %r24,%r24,%r28
|
||||
ds %r25,%r23,%r25
|
||||
comclr,>= %r25,%r0,%r0
|
||||
addl %r25,%r23,%r25
|
||||
stws %r25,0(0,%r26)
|
||||
bv 0(%r2)
|
||||
addc %r28,%r28,%r28
|
||||
|
||||
L$largedivisor
|
||||
extru %r24,31,1,%r19 ; r19 = n0 & 1
|
||||
bb,< %r23,31,L$odd
|
||||
extru %r23,30,31,%r22 ; r22 = d >> 1
|
||||
shd %r25,%r24,1,%r24 ; r24 = new n0
|
||||
extru %r25,30,31,%r25 ; r25 = new n1
|
||||
sub %r0,%r22,%r21
|
||||
ds %r0,%r21,%r0
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
comclr,>= %r25,%r0,%r0
|
||||
addl %r25,%r22,%r25
|
||||
sh1addl %r25,%r19,%r25
|
||||
stws %r25,0(0,%r26)
|
||||
bv 0(%r2)
|
||||
addc %r24,%r24,%r28
|
||||
|
||||
L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1)
|
||||
shd %r25,%r24,1,%r24 ; r24 = new n0
|
||||
extru %r25,30,31,%r25 ; r25 = new n1
|
||||
sub %r0,%r22,%r21
|
||||
ds %r0,%r21,%r0
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r24
|
||||
ds %r25,%r22,%r25
|
||||
addc %r24,%r24,%r28
|
||||
comclr,>= %r25,%r0,%r0
|
||||
addl %r25,%r22,%r25
|
||||
sh1addl %r25,%r19,%r25
|
||||
; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
|
||||
add,nuv %r28,%r25,%r25
|
||||
addl %r25,%r1,%r25
|
||||
addc %r0,%r28,%r28
|
||||
sub,<< %r25,%r23,%r0
|
||||
addl %r25,%r1,%r25
|
||||
stws %r25,0(0,%r26)
|
||||
bv 0(%r2)
|
||||
addc %r0,%r28,%r28
|
||||
|
||||
; This is just a special case of the code above.
|
||||
; We come here when d == 0xFFFFFFFF
|
||||
L$FF.. add,uv %r25,%r24,%r24
|
||||
sub,<< %r24,%r23,%r0
|
||||
ldo 1(%r24),%r24
|
||||
stws %r24,0(0,%r26)
|
||||
bv 0(%r2)
|
||||
addc %r0,%r25,%r28
|
||||
|
||||
.exit
|
||||
.procend
|
@ -1,7 +1,7 @@
|
||||
/* i80386 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||
sum in a third limb vector.
|
||||
|
||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
@ -54,14 +54,18 @@ C_SYMBOL_NAME(__mpn_add_n:)
|
||||
subl %eax,%edx /* ... enter the loop */
|
||||
shrl $2,%eax /* restore previous value */
|
||||
#ifdef PIC
|
||||
call here
|
||||
here: leal (Loop - 3 - here)(%eax,%eax,8),%eax
|
||||
addl %eax,(%esp)
|
||||
ret
|
||||
/* Calculate start address in loop for PIC. Due to limitations in some
|
||||
assemblers, Loop-L0-3 cannot be put into the leal */
|
||||
call L0
|
||||
L0: leal (%eax,%eax,8),%eax
|
||||
addl (%esp),%eax
|
||||
addl $(Loop-L0-3),%eax
|
||||
addl $4,%esp
|
||||
#else
|
||||
leal (Loop - 3)(%eax,%eax,8),%eax /* calc start addr in loop */
|
||||
jmp *%eax /* jump into loop */
|
||||
/* Calculate start address in loop for non-PIC. */
|
||||
leal (Loop - 3)(%eax,%eax,8),%eax
|
||||
#endif
|
||||
jmp *%eax /* jump into loop */
|
||||
ALIGN (3)
|
||||
Loop: movl (%esi),%eax
|
||||
adcl (%edx),%eax
|
||||
|
28
sysdeps/i386/gmp-mparam.h
Normal file
28
sysdeps/i386/gmp-mparam.h
Normal file
@ -0,0 +1,28 @@
|
||||
/* gmp-mparam.h -- Compiler/machine parameter header file.
|
||||
|
||||
Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#define BITS_PER_MP_LIMB 32
|
||||
#define BYTES_PER_MP_LIMB 4
|
||||
#define BITS_PER_LONGINT 32
|
||||
#define BITS_PER_INT 32
|
||||
#define BITS_PER_SHORTINT 16
|
||||
#define BITS_PER_CHAR 8
|
||||
|
||||
#define IEEE_DOUBLE_BIG_ENDIAN 0
|
260
sysdeps/i386/i486/strcat.S
Normal file
260
sysdeps/i386/i486/strcat.S
Normal file
@ -0,0 +1,260 @@
|
||||
/* strcat(dest, src) -- Append SRC on the end of DEST.
|
||||
For Intel 80x86, x>=4.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>.
|
||||
Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
dest (sp + 4)
|
||||
src (sp + 8)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (strcat)
|
||||
pushl %edi /* Save callee-safe register. */
|
||||
|
||||
movl 12(%esp), %ecx /* load source pointer */
|
||||
movl 8(%esp), %edx /* load destination pointer */
|
||||
|
||||
testb $0xff, (%ecx) /* Is source string empty? */
|
||||
jz L8 /* yes => return */
|
||||
|
||||
/* Test the first bytes separately until destination is aligned. */
|
||||
testb $3, %edx /* destination pointer aligned? */
|
||||
jz L1 /* yes => begin scan loop */
|
||||
testb $0xff, (%edx) /* is end of string? */
|
||||
jz L2 /* yes => start appending */
|
||||
incl %edx /* increment source pointer */
|
||||
|
||||
testb $3, %edx /* destination pointer aligned? */
|
||||
jz L1 /* yes => begin scan loop */
|
||||
testb $0xff, (%edx) /* is end of string? */
|
||||
jz L2 /* yes => start appending */
|
||||
incl %edx /* increment source pointer */
|
||||
|
||||
testb $3, %edx /* destination pointer aligned? */
|
||||
jz L1 /* yes => begin scan loop */
|
||||
testb $0xff, (%edx) /* is end of string? */
|
||||
jz L2 /* yes => start appending */
|
||||
incl %edx /* increment source pointer */
|
||||
|
||||
/* Now we are aligned. Begin scan loop. */
|
||||
jmp L1
|
||||
|
||||
ALIGN(4)
|
||||
|
||||
L4: addl $16,%edx /* increment destination pointer for round */
|
||||
|
||||
L1: movl (%edx), %eax /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
|
||||
/* If you compare this with the algorithm in memchr.S you will
|
||||
notice that here is an `xorl' statement missing. But you must
|
||||
not forget that we are looking for C == 0 and `xorl $0, %eax'
|
||||
is a no-op. */
|
||||
|
||||
addl %eax, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
|
||||
/* According to the algorithm we had to reverse the effect of the
|
||||
XOR first and then test the overflow bits. But because the
|
||||
following XOR would destroy the carry flag and it would (in a
|
||||
representation with more than 32 bits) not alter then last
|
||||
overflow, we can now test this condition. If no carry is signaled
|
||||
no overflow must have occured in the last byte => it was 0. */
|
||||
jnc L3
|
||||
|
||||
/* We are only interested in carry bits that change due to the
|
||||
previous add, so remove original bits */
|
||||
xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
|
||||
/* Now test for the other three overflow bits. */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
|
||||
/* If at least one byte of the word is C we don't get 0 in %ecx. */
|
||||
jnz L3
|
||||
|
||||
movl 4(%edx), %eax /* get word from source */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %eax, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L5 /* highest byte is C => stop copying */
|
||||
xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L5 /* one byte is NUL => stop copying */
|
||||
|
||||
movl 8(%edx), %eax /* get word from source */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %eax, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L6 /* highest byte is C => stop copying */
|
||||
xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L6 /* one byte is NUL => stop copying */
|
||||
|
||||
movl 12(%edx), %eax /* get word from source */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %eax, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L7 /* highest byte is C => stop copying */
|
||||
xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jz L4 /* no byte is NUL => carry on copying */
|
||||
|
||||
L7: addl $4, %edx /* adjust source pointer */
|
||||
L6: addl $4, %edx
|
||||
L5: addl $4, %edx
|
||||
|
||||
L3: testb %al, %al /* is first byte NUL? */
|
||||
jz L2 /* yes => start copying */
|
||||
incl %edx /* increment source pointer */
|
||||
|
||||
testb %ah, %ah /* is second byte NUL? */
|
||||
jz L2 /* yes => start copying */
|
||||
incl %edx /* increment source pointer */
|
||||
|
||||
testl $0xff0000, %eax /* is third byte NUL? */
|
||||
jz L2 /* yes => start copying */
|
||||
incl %edx /* increment source pointer */
|
||||
|
||||
L2: subl %ecx, %edx /* reduce number of loop variants */
|
||||
|
||||
/* Now we have to align the source pointer. */
|
||||
testb $3, %ecx /* pointer correctly aligned? */
|
||||
jz L29 /* yes => start copy loop */
|
||||
movb (%ecx), %al /* get first byte */
|
||||
movb %al, (%ecx,%edx) /* and store it */
|
||||
andl %al, %al /* is byte NUL? */
|
||||
jz L8 /* yes => return */
|
||||
incl %ecx /* increment pointer */
|
||||
|
||||
testb $3, %ecx /* pointer correctly aligned? */
|
||||
jz L29 /* yes => start copy loop */
|
||||
movb (%ecx), %al /* get first byte */
|
||||
movb %al, (%ecx,%edx) /* and store it */
|
||||
andl %al, %al /* is byte NUL? */
|
||||
jz L8 /* yes => return */
|
||||
incl %ecx /* increment pointer */
|
||||
|
||||
testb $3, %ecx /* pointer correctly aligned? */
|
||||
jz L29 /* yes => start copy loop */
|
||||
movb (%ecx), %al /* get first byte */
|
||||
movb %al, (%ecx,%edx) /* and store it */
|
||||
andl %al, %al /* is byte NUL? */
|
||||
jz L8 /* yes => return */
|
||||
incl %ecx /* increment pointer */
|
||||
|
||||
/* Now we are aligned. */
|
||||
jmp L29 /* start copy loop */
|
||||
|
||||
ALIGN(4)
|
||||
|
||||
L28: movl %eax, 12(%ecx,%edx)/* store word at destination */
|
||||
addl $16, %ecx /* adjust pointer for full round */
|
||||
|
||||
L29: movl (%ecx), %eax /* get word from source */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %eax, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L9 /* highest byte is C => stop copying */
|
||||
xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L9 /* one byte is NUL => stop copying */
|
||||
movl %eax, (%ecx,%edx) /* store word to destination */
|
||||
|
||||
movl 4(%ecx), %eax /* get word from source */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %eax, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L91 /* highest byte is C => stop copying */
|
||||
xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L91 /* one byte is NUL => stop copying */
|
||||
movl %eax, 4(%ecx,%edx) /* store word to destination */
|
||||
|
||||
movl 8(%ecx), %eax /* get word from source */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %eax, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L92 /* highest byte is C => stop copying */
|
||||
xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L92 /* one byte is NUL => stop copying */
|
||||
movl %eax, 8(%ecx,%edx) /* store word to destination */
|
||||
|
||||
movl 12(%ecx), %eax /* get word from source */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %eax, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L93 /* highest byte is C => stop copying */
|
||||
xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jz L28 /* no is NUL => carry on copying */
|
||||
|
||||
L93: addl $4, %ecx /* adjust pointer */
|
||||
L92: addl $4, %ecx
|
||||
L91: addl $4, %ecx
|
||||
|
||||
L9: movb %al, (%ecx,%edx) /* store first byte of last word */
|
||||
orb %al, %al /* is it NUL? */
|
||||
jz L8 /* yes => return */
|
||||
|
||||
movb %ah, 1(%ecx,%edx) /* store second byte of last word */
|
||||
orb %ah, %ah /* is it NUL? */
|
||||
jz L8 /* yes => return */
|
||||
|
||||
shrl $16, %eax /* make upper bytes accessible */
|
||||
movb %al, 2(%ecx,%edx) /* store third byte of last word */
|
||||
orb %al, %al /* is it NUL? */
|
||||
jz L8 /* yes => return */
|
||||
|
||||
movb %ah, 3(%ecx,%edx) /* store fourth byte of last word */
|
||||
|
||||
L8: movl 8(%esp), %eax /* start address of destination is result */
|
||||
popl %edi /* restore saved register */
|
||||
|
||||
ret
|
132
sysdeps/i386/i486/strlen.S
Normal file
132
sysdeps/i386/i486/strlen.S
Normal file
@ -0,0 +1,132 @@
|
||||
/* strlen(str) -- determine the length of the string STR.
|
||||
Optimized for Intel 80x86, x>=4.
|
||||
Copyright (C) 1991, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
str (sp + 4)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (strlen)
|
||||
movl 4(%esp), %ecx /* get string pointer */
|
||||
movl %ecx, %eax /* duplicate it */
|
||||
|
||||
andl $3, %ecx /* mask alignment bits */
|
||||
jz L1 /* aligned => start loop */
|
||||
cmpb %ch, (%eax) /* is byte NUL? */
|
||||
je L2 /* yes => return */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
xorl $3, %ecx /* was alignment = 3? */
|
||||
jz L1 /* yes => now it is aligned and start loop */
|
||||
cmpb %ch, (%eax) /* is byte NUL? */
|
||||
je L2 /* yes => return */
|
||||
addl $1, %eax /* increment pointer */
|
||||
|
||||
subl $1, %ecx /* was alignment = 2? */
|
||||
jz L1 /* yes => now it is aligned and start loop */
|
||||
cmpb %ch, (%eax) /* is byte NUL? */
|
||||
je L2 /* yes => return */
|
||||
|
||||
/* Don't change the above `addl $1,%eax' and `subl $1, %ecx' into `incl %eax'
|
||||
and `decl %ecx' resp. The additional two byte per instruction make the
|
||||
label 4 to be aligned on a 16 byte boundary with nops.
|
||||
|
||||
The following `sub $15, %eax' is part of this trick, too. Together with
|
||||
the next instruction (`addl $16, %eax') it is in fact a `incl %eax', just
|
||||
as expected from the algorithm. But doing so has the advantage that
|
||||
no jump to label 1 is necessary and so the pipeline is not flushed. */
|
||||
|
||||
subl $15, %eax /* effectively +1 */
|
||||
|
||||
|
||||
L4: addl $16, %eax /* adjust pointer for full loop */
|
||||
|
||||
L1: movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edx /* magic value */
|
||||
addl %ecx, %edx /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L3 /* highest byte is NUL => return pointer */
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edx /* set all non-carry bits */
|
||||
incl %edx /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L3 /* found NUL => return pointer */
|
||||
|
||||
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edx /* magic value */
|
||||
addl %ecx, %edx /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L5 /* highest byte is NUL => return pointer */
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edx /* set all non-carry bits */
|
||||
incl %edx /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L5 /* found NUL => return pointer */
|
||||
|
||||
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edx /* magic value */
|
||||
addl %ecx, %edx /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L6 /* highest byte is NUL => return pointer */
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edx /* set all non-carry bits */
|
||||
incl %edx /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L6 /* found NUL => return pointer */
|
||||
|
||||
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edx /* magic value */
|
||||
addl %ecx, %edx /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L7 /* highest byte is NUL => return pointer */
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edx /* set all non-carry bits */
|
||||
incl %edx /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jz L4 /* no NUL found => continue loop */
|
||||
|
||||
L7: addl $4, %eax /* adjust pointer */
|
||||
L6: addl $4, %eax
|
||||
L5: addl $4, %eax
|
||||
|
||||
L3: testb %cl, %cl /* is first byte NUL? */
|
||||
jz L2 /* yes => return */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
testb %ch, %ch /* is second byte NUL? */
|
||||
jz L2 /* yes => return */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
testl $0xff0000, %ecx /* is third byte NUL? */
|
||||
jz L2 /* yes => return pointer */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
L2: subl 4(%esp), %eax /* compute difference to string start */
|
||||
|
||||
ret
|
2
sysdeps/i386/i586/Implies
Normal file
2
sysdeps/i386/i586/Implies
Normal file
@ -0,0 +1,2 @@
|
||||
# Code optimized for i486 is better than simple i386 code.
|
||||
i386/i486
|
136
sysdeps/i386/i586/add_n.S
Normal file
136
sysdeps/i386/i586/add_n.S
Normal file
@ -0,0 +1,136 @@
|
||||
/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||
sum in a third limb vector.
|
||||
|
||||
Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS
|
||||
res_ptr (sp + 4)
|
||||
s1_ptr (sp + 8)
|
||||
s2_ptr (sp + 12)
|
||||
size (sp + 16)
|
||||
*/
|
||||
|
||||
#define r1 %eax
|
||||
#define r2 %edx
|
||||
#define src1 %esi
|
||||
#define src2 %ebp
|
||||
#define dst %edi
|
||||
#define x %ebx
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
.text
|
||||
ALIGN (3)
|
||||
.globl C_SYMBOL_NAME(__mpn_add_n)
|
||||
C_SYMBOL_NAME(__mpn_add_n:)
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
movl 20(%esp),dst /* res_ptr */
|
||||
movl 24(%esp),src1 /* s1_ptr */
|
||||
movl 28(%esp),src2 /* s2_ptr */
|
||||
movl 32(%esp),%ecx /* size */
|
||||
|
||||
movl (src2),x
|
||||
|
||||
decl %ecx
|
||||
movl %ecx,r2
|
||||
shrl $3,%ecx
|
||||
andl $7,r2
|
||||
testl %ecx,%ecx /* zero carry flag */
|
||||
jz Lend
|
||||
pushl r2
|
||||
|
||||
ALIGN (3)
|
||||
Loop: movl 28(dst),%eax /* fetch destination cache line */
|
||||
leal 32(dst),dst
|
||||
|
||||
L1: movl (src1),r1
|
||||
movl 4(src1),r2
|
||||
adcl x,r1
|
||||
movl 4(src2),x
|
||||
adcl x,r2
|
||||
movl 8(src2),x
|
||||
movl r1,-32(dst)
|
||||
movl r2,-28(dst)
|
||||
|
||||
L2: movl 8(src1),r1
|
||||
movl 12(src1),r2
|
||||
adcl x,r1
|
||||
movl 12(src2),x
|
||||
adcl x,r2
|
||||
movl 16(src2),x
|
||||
movl r1,-24(dst)
|
||||
movl r2,-20(dst)
|
||||
|
||||
L3: movl 16(src1),r1
|
||||
movl 20(src1),r2
|
||||
adcl x,r1
|
||||
movl 20(src2),x
|
||||
adcl x,r2
|
||||
movl 24(src2),x
|
||||
movl r1,-16(dst)
|
||||
movl r2,-12(dst)
|
||||
|
||||
L4: movl 24(src1),r1
|
||||
movl 28(src1),r2
|
||||
adcl x,r1
|
||||
movl 28(src2),x
|
||||
adcl x,r2
|
||||
movl 32(src2),x
|
||||
movl r1,-8(dst)
|
||||
movl r2,-4(dst)
|
||||
|
||||
leal 32(src1),src1
|
||||
leal 32(src2),src2
|
||||
decl %ecx
|
||||
jnz Loop
|
||||
|
||||
popl r2
|
||||
Lend:
|
||||
decl r2 /* test r2 w/o clobbering carry */
|
||||
js Lend2
|
||||
incl r2
|
||||
Loop2:
|
||||
leal 4(dst),dst
|
||||
movl (src1),r1
|
||||
adcl x,r1
|
||||
movl 4(src2),x
|
||||
movl r1,-4(dst)
|
||||
leal 4(src1),src1
|
||||
leal 4(src2),src2
|
||||
decl r2
|
||||
jnz Loop2
|
||||
Lend2:
|
||||
movl (src1),r1
|
||||
adcl x,r1
|
||||
movl r1,(dst)
|
||||
|
||||
sbbl %eax,%eax
|
||||
negl %eax
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
84
sysdeps/i386/i586/addmul_1.S
Normal file
84
sysdeps/i386/i586/addmul_1.S
Normal file
@ -0,0 +1,84 @@
|
||||
/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||
the result to a second limb vector.
|
||||
|
||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS
|
||||
res_ptr (sp + 4)
|
||||
s1_ptr (sp + 8)
|
||||
size (sp + 12)
|
||||
s2_limb (sp + 16)
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#define res_ptr edi
|
||||
#define s1_ptr esi
|
||||
#define s2_limb ebp
|
||||
|
||||
TEXT
|
||||
ALIGN (3)
|
||||
GLOBL C_SYMBOL_NAME(__mpn_addmul_1)
|
||||
.type C_SYMBOL_NAME(__mpn_addmul_1),@function
|
||||
C_SYMBOL_NAME(__mpn_addmul_1:)
|
||||
|
||||
INSN1(push,l ,R(edi))
|
||||
INSN1(push,l ,R(esi))
|
||||
INSN1(push,l ,R(ebx))
|
||||
INSN1(push,l ,R(ebp))
|
||||
|
||||
INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
|
||||
INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
|
||||
INSN2(mov,l ,R(ecx),MEM_DISP(esp,28))
|
||||
INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
|
||||
|
||||
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,ecx,4))
|
||||
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,ecx,4))
|
||||
INSN1(neg,l ,R(ecx))
|
||||
INSN2(xor,l ,R(edx),R(edx))
|
||||
ALIGN (3)
|
||||
Loop:
|
||||
INSN2(mov,l ,R(ebx),R(edx))
|
||||
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,ecx,4))
|
||||
|
||||
INSN1(mul,l ,R(s2_limb))
|
||||
|
||||
INSN2(add,l ,R(eax),R(ebx))
|
||||
INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,ecx,4))
|
||||
|
||||
INSN2(adc,l ,R(edx),$0)
|
||||
INSN2(add,l ,R(ebx),R(eax))
|
||||
|
||||
INSN2(adc,l ,R(edx),$0)
|
||||
INSN2(mov,l ,MEM_INDEX(res_ptr,ecx,4),R(ebx))
|
||||
|
||||
INSN1(inc,l ,R(ecx))
|
||||
INSN1(jnz, ,Loop)
|
||||
|
||||
|
||||
INSN2(mov,l ,R(eax),R(edx))
|
||||
INSN1(pop,l ,R(ebp))
|
||||
INSN1(pop,l ,R(ebx))
|
||||
INSN1(pop,l ,R(esi))
|
||||
INSN1(pop,l ,R(edi))
|
||||
ret
|
||||
Lfe1:
|
||||
.size C_SYMBOL_NAME(__mpn_addmul_1),Lfe1-C_SYMBOL_NAME(__mpn_addmul_1)
|
213
sysdeps/i386/i586/lshift.S
Normal file
213
sysdeps/i386/i586/lshift.S
Normal file
@ -0,0 +1,213 @@
|
||||
/* Pentium optimized __mpn_lshift --
|
||||
|
||||
Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS
|
||||
res_ptr (sp + 4)
|
||||
s_ptr (sp + 8)
|
||||
size (sp + 12)
|
||||
cnt (sp + 16)
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
.text
|
||||
ALIGN (3)
|
||||
.globl C_SYMBOL_NAME(__mpn_lshift)
|
||||
C_SYMBOL_NAME(__mpn_lshift:)
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
movl 20(%esp),%edi /* res_ptr */
|
||||
movl 24(%esp),%esi /* s_ptr */
|
||||
movl 28(%esp),%ebp /* size */
|
||||
movl 32(%esp),%ecx /* cnt */
|
||||
|
||||
cmp $1,%ecx
|
||||
jne Lnormal
|
||||
movl %edi,%eax
|
||||
subl %esi,%eax
|
||||
cmpl %ebp,%eax
|
||||
jnc Lspecial
|
||||
|
||||
Lnormal:
|
||||
leal -4(%edi,%ebp,4),%edi
|
||||
leal -4(%esi,%ebp,4),%esi
|
||||
|
||||
movl (%esi),%edx
|
||||
subl $4,%esi
|
||||
xorl %eax,%eax
|
||||
shldl %cl,%edx,%eax /* compute carry limb */
|
||||
pushl %eax /* push carry limb onto stack */
|
||||
|
||||
decl %ebp
|
||||
pushl %ebp
|
||||
shrl $3,%ebp
|
||||
jz Lend
|
||||
|
||||
movl (%edi),%eax /* fetch destination cache line */
|
||||
|
||||
ALIGN (2)
|
||||
Loop: movl -28(%edi),%eax /* fetch destination cache line */
|
||||
movl %edx,%ebx
|
||||
|
||||
movl (%esi),%eax
|
||||
movl -4(%esi),%edx
|
||||
shldl %cl,%eax,%ebx
|
||||
shldl %cl,%edx,%eax
|
||||
movl %ebx,(%edi)
|
||||
movl %eax,-4(%edi)
|
||||
|
||||
movl -8(%esi),%ebx
|
||||
movl -12(%esi),%eax
|
||||
shldl %cl,%ebx,%edx
|
||||
shldl %cl,%eax,%ebx
|
||||
movl %edx,-8(%edi)
|
||||
movl %ebx,-12(%edi)
|
||||
|
||||
movl -16(%esi),%edx
|
||||
movl -20(%esi),%ebx
|
||||
shldl %cl,%edx,%eax
|
||||
shldl %cl,%ebx,%edx
|
||||
movl %eax,-16(%edi)
|
||||
movl %edx,-20(%edi)
|
||||
|
||||
movl -24(%esi),%eax
|
||||
movl -28(%esi),%edx
|
||||
shldl %cl,%eax,%ebx
|
||||
shldl %cl,%edx,%eax
|
||||
movl %ebx,-24(%edi)
|
||||
movl %eax,-28(%edi)
|
||||
|
||||
subl $32,%esi
|
||||
subl $32,%edi
|
||||
decl %ebp
|
||||
jnz Loop
|
||||
|
||||
Lend: popl %ebp
|
||||
andl $7,%ebp
|
||||
jz Lend2
|
||||
Loop2: movl (%esi),%eax
|
||||
shldl %cl,%eax,%edx
|
||||
movl %edx,(%edi)
|
||||
movl %eax,%edx
|
||||
subl $4,%esi
|
||||
subl $4,%edi
|
||||
decl %ebp
|
||||
jnz Loop2
|
||||
|
||||
Lend2: shll %cl,%edx /* compute least significant limb */
|
||||
movl %edx,(%edi) /* store it */
|
||||
|
||||
popl %eax /* pop carry limb */
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
||||
|
||||
/* We loop from least significant end of the arrays, which is only
|
||||
permissable if the source and destination don't overlap, since the
|
||||
function is documented to work for overlapping source and destination.
|
||||
*/
|
||||
|
||||
Lspecial:
|
||||
movl (%esi),%edx
|
||||
addl $4,%esi
|
||||
|
||||
decl %ebp
|
||||
pushl %ebp
|
||||
shrl $3,%ebp
|
||||
|
||||
addl %edx,%edx
|
||||
incl %ebp
|
||||
decl %ebp
|
||||
jz LLend
|
||||
|
||||
movl (%edi),%eax /* fetch destination cache line */
|
||||
|
||||
ALIGN (2)
|
||||
LLoop: movl 28(%edi),%eax /* fetch destination cache line */
|
||||
movl %edx,%ebx
|
||||
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%edx
|
||||
adcl %eax,%eax
|
||||
movl %ebx,(%edi)
|
||||
adcl %edx,%edx
|
||||
movl %eax,4(%edi)
|
||||
|
||||
movl 8(%esi),%ebx
|
||||
movl 12(%esi),%eax
|
||||
adcl %ebx,%ebx
|
||||
movl %edx,8(%edi)
|
||||
adcl %eax,%eax
|
||||
movl %ebx,12(%edi)
|
||||
|
||||
movl 16(%esi),%edx
|
||||
movl 20(%esi),%ebx
|
||||
adcl %edx,%edx
|
||||
movl %eax,16(%edi)
|
||||
adcl %ebx,%ebx
|
||||
movl %edx,20(%edi)
|
||||
|
||||
movl 24(%esi),%eax
|
||||
movl 28(%esi),%edx
|
||||
adcl %eax,%eax
|
||||
movl %ebx,24(%edi)
|
||||
adcl %edx,%edx
|
||||
movl %eax,28(%edi)
|
||||
|
||||
leal 32(%esi),%esi /* use leal not to clobber carry */
|
||||
leal 32(%edi),%edi
|
||||
decl %ebp
|
||||
jnz LLoop
|
||||
|
||||
LLend: popl %ebp
|
||||
sbbl %eax,%eax /* save carry in %eax */
|
||||
andl $7,%ebp
|
||||
jz LLend2
|
||||
addl %eax,%eax /* restore carry from eax */
|
||||
LLoop2: movl %edx,%ebx
|
||||
movl (%esi),%edx
|
||||
adcl %edx,%edx
|
||||
movl %ebx,(%edi)
|
||||
|
||||
leal 4(%esi),%esi /* use leal not to clobber carry */
|
||||
leal 4(%edi),%edi
|
||||
decl %ebp
|
||||
jnz LLoop2
|
||||
|
||||
jmp LL1
|
||||
LLend2: addl %eax,%eax /* restore carry from eax */
|
||||
LL1: movl %edx,(%edi) /* store last limb */
|
||||
|
||||
sbbl %eax,%eax
|
||||
negl %eax
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
@ -1,5 +1,5 @@
|
||||
/* memcopy.h -- definitions for memory copy functions. Pentium version.
|
||||
Copyright (C) 1994 Free Software Foundation, Inc.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Torbjorn Granlund (tege@sics.se).
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
@ -88,7 +88,7 @@ Cambridge, MA 02139, USA. */
|
||||
"subl $32,%2\n" \
|
||||
"jns 1b\n" \
|
||||
"2: addl $32,%2" : \
|
||||
"=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) : \
|
||||
"0" (dst_bp), "1" (src_bp), "2" (nbytes) : \
|
||||
"=r" (dst_ep), "=r" (src_ep), "=r" (nbytes_left) : \
|
||||
"0" (dst_ep), "1" (src_ep), "2" (nbytes) : \
|
||||
"ax", "dx"); \
|
||||
} while (0)
|
||||
|
78
sysdeps/i386/i586/mul_1.S
Normal file
78
sysdeps/i386/i586/mul_1.S
Normal file
@ -0,0 +1,78 @@
|
||||
/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||
the result in a second limb vector.
|
||||
|
||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS
|
||||
res_ptr (sp + 4)
|
||||
s1_ptr (sp + 8)
|
||||
size (sp + 12)
|
||||
s2_limb (sp + 16)
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#define res_ptr edi
|
||||
#define s1_ptr esi
|
||||
#define size ecx
|
||||
#define s2_limb ebp
|
||||
|
||||
TEXT
|
||||
ALIGN (3)
|
||||
GLOBL C_SYMBOL_NAME(__mpn_mul_1)
|
||||
C_SYMBOL_NAME(__mpn_mul_1:)
|
||||
|
||||
INSN1(push,l ,R(edi))
|
||||
INSN1(push,l ,R(esi))
|
||||
INSN1(push,l ,R(ebx))
|
||||
INSN1(push,l ,R(ebp))
|
||||
|
||||
INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
|
||||
INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
|
||||
INSN2(mov,l ,R(size),MEM_DISP(esp,28))
|
||||
INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
|
||||
|
||||
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
|
||||
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
|
||||
INSN1(neg,l ,R(size))
|
||||
INSN2(xor,l ,R(edx),R(edx))
|
||||
ALIGN (3)
|
||||
Loop:
|
||||
INSN2(mov,l ,R(ebx),R(edx))
|
||||
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
|
||||
|
||||
INSN1(mul,l ,R(s2_limb))
|
||||
|
||||
INSN2(add,l ,R(eax),R(ebx))
|
||||
|
||||
INSN2(adc,l ,R(edx),$0)
|
||||
INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(eax))
|
||||
|
||||
INSN1(inc,l ,R(size))
|
||||
INSN1(jnz, ,Loop)
|
||||
|
||||
|
||||
INSN2(mov,l ,R(eax),R(edx))
|
||||
INSN1(pop,l ,R(ebp))
|
||||
INSN1(pop,l ,R(ebx))
|
||||
INSN1(pop,l ,R(esi))
|
||||
INSN1(pop,l ,R(edi))
|
||||
ret
|
213
sysdeps/i386/i586/rshift.S
Normal file
213
sysdeps/i386/i586/rshift.S
Normal file
@ -0,0 +1,213 @@
|
||||
/* Pentium optimized __mpn_rshift --
|
||||
|
||||
Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS
|
||||
res_ptr (sp + 4)
|
||||
s_ptr (sp + 8)
|
||||
size (sp + 12)
|
||||
cnt (sp + 16)
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
.text
|
||||
ALIGN (3)
|
||||
.globl C_SYMBOL_NAME(__mpn_rshift)
|
||||
C_SYMBOL_NAME(__mpn_rshift:)
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
movl 20(%esp),%edi /* res_ptr */
|
||||
movl 24(%esp),%esi /* s_ptr */
|
||||
movl 28(%esp),%ebp /* size */
|
||||
movl 32(%esp),%ecx /* cnt */
|
||||
|
||||
cmp $1,%ecx
|
||||
jne Lnormal
|
||||
movl %edi,%eax
|
||||
subl %esi,%eax
|
||||
cmpl %ebp,%eax
|
||||
jnc Lspecial
|
||||
|
||||
Lnormal:
|
||||
movl (%esi),%edx
|
||||
addl $4,%esi
|
||||
xorl %eax,%eax
|
||||
shrdl %cl,%edx,%eax /* compute carry limb */
|
||||
pushl %eax /* push carry limb onto stack */
|
||||
|
||||
decl %ebp
|
||||
pushl %ebp
|
||||
shrl $3,%ebp
|
||||
jz Lend
|
||||
|
||||
movl (%edi),%eax /* fetch destination cache line */
|
||||
|
||||
ALIGN (2)
|
||||
Loop: movl 28(%edi),%eax /* fetch destination cache line */
|
||||
movl %edx,%ebx
|
||||
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%edx
|
||||
shrdl %cl,%eax,%ebx
|
||||
shrdl %cl,%edx,%eax
|
||||
movl %ebx,(%edi)
|
||||
movl %eax,4(%edi)
|
||||
|
||||
movl 8(%esi),%ebx
|
||||
movl 12(%esi),%eax
|
||||
shrdl %cl,%ebx,%edx
|
||||
shrdl %cl,%eax,%ebx
|
||||
movl %edx,8(%edi)
|
||||
movl %ebx,12(%edi)
|
||||
|
||||
movl 16(%esi),%edx
|
||||
movl 20(%esi),%ebx
|
||||
shrdl %cl,%edx,%eax
|
||||
shrdl %cl,%ebx,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,20(%edi)
|
||||
|
||||
movl 24(%esi),%eax
|
||||
movl 28(%esi),%edx
|
||||
shrdl %cl,%eax,%ebx
|
||||
shrdl %cl,%edx,%eax
|
||||
movl %ebx,24(%edi)
|
||||
movl %eax,28(%edi)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
decl %ebp
|
||||
jnz Loop
|
||||
|
||||
Lend: popl %ebp
|
||||
andl $7,%ebp
|
||||
jz Lend2
|
||||
Loop2: movl (%esi),%eax
|
||||
shrdl %cl,%eax,%edx /* compute result limb */
|
||||
movl %edx,(%edi)
|
||||
movl %eax,%edx
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
decl %ebp
|
||||
jnz Loop2
|
||||
|
||||
Lend2: shrl %cl,%edx /* compute most significant limb */
|
||||
movl %edx,(%edi) /* store it */
|
||||
|
||||
popl %eax /* pop carry limb */
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
||||
|
||||
/* We loop from least significant end of the arrays, which is only
|
||||
permissable if the source and destination don't overlap, since the
|
||||
function is documented to work for overlapping source and destination.
|
||||
*/
|
||||
|
||||
Lspecial:
|
||||
leal -4(%edi,%ebp,4),%edi
|
||||
leal -4(%esi,%ebp,4),%esi
|
||||
|
||||
movl (%esi),%edx
|
||||
subl $4,%esi
|
||||
|
||||
decl %ebp
|
||||
pushl %ebp
|
||||
shrl $3,%ebp
|
||||
|
||||
shrl $1,%edx
|
||||
incl %ebp
|
||||
decl %ebp
|
||||
jz LLend
|
||||
|
||||
movl (%edi),%eax /* fetch destination cache line */
|
||||
|
||||
ALIGN (2)
|
||||
LLoop: movl -28(%edi),%eax /* fetch destination cache line */
|
||||
movl %edx,%ebx
|
||||
|
||||
movl (%esi),%eax
|
||||
movl -4(%esi),%edx
|
||||
rcrl $1,%eax
|
||||
movl %ebx,(%edi)
|
||||
rcrl $1,%edx
|
||||
movl %eax,-4(%edi)
|
||||
|
||||
movl -8(%esi),%ebx
|
||||
movl -12(%esi),%eax
|
||||
rcrl $1,%ebx
|
||||
movl %edx,-8(%edi)
|
||||
rcrl $1,%eax
|
||||
movl %ebx,-12(%edi)
|
||||
|
||||
movl -16(%esi),%edx
|
||||
movl -20(%esi),%ebx
|
||||
rcrl $1,%edx
|
||||
movl %eax,-16(%edi)
|
||||
rcrl $1,%ebx
|
||||
movl %edx,-20(%edi)
|
||||
|
||||
movl -24(%esi),%eax
|
||||
movl -28(%esi),%edx
|
||||
rcrl $1,%eax
|
||||
movl %ebx,-24(%edi)
|
||||
rcrl $1,%edx
|
||||
movl %eax,-28(%edi)
|
||||
|
||||
leal -32(%esi),%esi /* use leal not to clobber carry */
|
||||
leal -32(%edi),%edi
|
||||
decl %ebp
|
||||
jnz LLoop
|
||||
|
||||
LLend: popl %ebp
|
||||
sbbl %eax,%eax /* save carry in %eax */
|
||||
andl $7,%ebp
|
||||
jz LLend2
|
||||
addl %eax,%eax /* restore carry from eax */
|
||||
LLoop2: movl %edx,%ebx
|
||||
movl (%esi),%edx
|
||||
rcrl $1,%edx
|
||||
movl %ebx,(%edi)
|
||||
|
||||
leal -4(%esi),%esi /* use leal not to clobber carry */
|
||||
leal -4(%edi),%edi
|
||||
decl %ebp
|
||||
jnz LLoop2
|
||||
|
||||
jmp LL1
|
||||
LLend2: addl %eax,%eax /* restore carry from eax */
|
||||
LL1: movl %edx,(%edi) /* store last limb */
|
||||
|
||||
movl $0,%eax
|
||||
rcrl $1,%eax
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
334
sysdeps/i386/i586/strchr.S
Normal file
334
sysdeps/i386/i586/strchr.S
Normal file
@ -0,0 +1,334 @@
|
||||
/* strchr -- find character CH in a NUL terminated string.
|
||||
Highly optimized version for ix85, x>=5.
|
||||
Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
/* This version is especially optimized for the i586 (and following?)
|
||||
processors. This is mainly done by using the two pipelines. The
|
||||
version optimized for i486 is weak in this aspect because to get
|
||||
as much parallelism we have to executs some *more* instructions.
|
||||
|
||||
The code below is structured to reflect the pairing of the instructions
|
||||
as *I think* it is. I have no processor data book to verify this.
|
||||
If you find something you think is incorrect let me know. */
|
||||
|
||||
|
||||
/* The magic value which is used throughout in the whole code. */
|
||||
#define magic 0xfefefeff
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
str (sp + 4)
|
||||
ch (sp + 8)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (strchr)
|
||||
pushl %edi /* Save callee-safe registers. */
|
||||
pushl %esi
|
||||
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
movl 20(%esp), %eax /* get string pointer */
|
||||
movl 24(%esp), %edx /* get character we are looking for */
|
||||
|
||||
movl %eax, %edi /* duplicate string pointer for later */
|
||||
xorl %ecx, %ecx /* clear %ecx */
|
||||
|
||||
/* At the moment %edx contains C. What we need for the
|
||||
algorithm is C in all bytes of the dword. Avoid
|
||||
operations on 16 bit words because these require an
|
||||
prefix byte (and one more cycle). */
|
||||
movb %dl, %dh /* now it is 0|0|c|c */
|
||||
movb %dl, %cl /* we construct the lower half in %ecx */
|
||||
|
||||
shll $16, %edx /* now %edx is c|c|0|0 */
|
||||
movb %cl, %ch /* now %ecx is 0|0|c|c */
|
||||
|
||||
orl %ecx, %edx /* and finally c|c|c|c */
|
||||
andl $3, %edi /* mask alignment bits */
|
||||
|
||||
jz L11 /* alignment is 0 => start loop */
|
||||
|
||||
movb (%eax), %cl /* load single byte */
|
||||
cmpb %cl, %dl /* is byte == C? */
|
||||
|
||||
je L2 /* aligned => return pointer */
|
||||
|
||||
cmp $0, %cl /* is byte NUL? */
|
||||
je L3 /* yes => return NULL */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
cmp $3, %edi /* was alignment == 3? */
|
||||
|
||||
je L11 /* yes => start loop */
|
||||
|
||||
movb (%eax), %cl /* load single byte */
|
||||
cmpb %cl, %dl /* is byte == C? */
|
||||
|
||||
je L2 /* aligned => return pointer */
|
||||
|
||||
cmp $0, %cl /* is byte NUL? */
|
||||
je L3 /* yes => return NULL */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
cmp $2, %edi /* was alignment == 2? */
|
||||
|
||||
je L11 /* yes => start loop */
|
||||
|
||||
movb (%eax), %cl /* load single byte */
|
||||
cmpb %cl, %dl /* is byte == C? */
|
||||
|
||||
je L2 /* aligned => return pointer */
|
||||
|
||||
cmp $0, %cl /* is byte NUL? */
|
||||
je L3 /* yes => return NULL */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
/* The following code is the preparation for the loop. The
|
||||
four instruction up to `L1' will not be executed in the loop
|
||||
because the same code is found at the end of the loop, but
|
||||
there it is executed in parallel with other instructions. */
|
||||
L11: movl (%eax), %ecx
|
||||
movl $magic, %ebp
|
||||
|
||||
movl $magic, %edi
|
||||
addl %ecx, %ebp
|
||||
|
||||
/* The main loop: it looks complex and indeed it is. I would
|
||||
love to say `it was hard to write, so it should he hard to
|
||||
read' but I will give some more hints. To fully understand
|
||||
this code you should first take a look at the i486 version.
|
||||
The basic algorithm is the same, but here the code organized
|
||||
in a way which permits to use both pipelines all the time.
|
||||
|
||||
I tried to make it a bit more understandable by indenting
|
||||
the code according to stage in the algorithm. It goes as
|
||||
follows:
|
||||
check for 0 in 1st word
|
||||
check for C in 1st word
|
||||
check for 0 in 2nd word
|
||||
check for C in 2nd word
|
||||
check for 0 in 3rd word
|
||||
check for C in 3rd word
|
||||
check for 0 in 4th word
|
||||
check for C in 4th word
|
||||
|
||||
Please note that doing the test for NUL before the test for
|
||||
C allows us to overlap the test for 0 in the next word with
|
||||
the test for C. */
|
||||
|
||||
L1: xorl %ecx, %ebp /* (word^magic) */
|
||||
addl %ecx, %edi /* add magic word */
|
||||
|
||||
leal 4(%eax), %eax /* increment pointer */
|
||||
jnc L4 /* previous addl caused overflow? */
|
||||
|
||||
movl %ecx, %ebx /* duplicate original word */
|
||||
orl $magic, %ebp /* (word^magic)|magic */
|
||||
|
||||
addl $1, %ebp /* (word^magic)|magic == 0xffffffff? */
|
||||
jne L4 /* yes => we found word with NUL */
|
||||
|
||||
movl $magic, %esi /* load magic value */
|
||||
xorl %edx, %ebx /* clear words which are C */
|
||||
|
||||
movl (%eax), %ecx
|
||||
addl %ebx, %esi /* (word+magic) */
|
||||
|
||||
movl $magic, %edi
|
||||
jnc L5 /* previous addl caused overflow? */
|
||||
|
||||
movl %edi, %ebp
|
||||
xorl %ebx, %esi /* (word+magic)^word */
|
||||
|
||||
addl %ecx, %ebp
|
||||
orl $magic, %esi /* ((word+magic)^word)|magic */
|
||||
|
||||
addl $1, %esi /* ((word+magic)^word)|magic==0xf..f?*/
|
||||
jne L5 /* yes => we found word with C */
|
||||
|
||||
xorl %ecx, %ebp
|
||||
addl %ecx, %edi
|
||||
|
||||
leal 4(%eax), %eax
|
||||
jnc L4
|
||||
|
||||
movl %ecx, %ebx
|
||||
orl $magic, %ebp
|
||||
|
||||
addl $1, %ebp
|
||||
jne L4
|
||||
|
||||
movl $magic, %esi
|
||||
xorl %edx, %ebx
|
||||
|
||||
movl (%eax), %ecx
|
||||
addl %ebx, %esi
|
||||
|
||||
movl $magic, %edi
|
||||
jnc L5
|
||||
|
||||
movl %edi, %ebp
|
||||
xorl %ebx, %esi
|
||||
|
||||
addl %ecx, %ebp
|
||||
orl $magic, %esi
|
||||
|
||||
addl $1, %esi
|
||||
jne L5
|
||||
|
||||
xorl %ecx, %ebp
|
||||
addl %ecx, %edi
|
||||
|
||||
leal 4(%eax), %eax
|
||||
jnc L4
|
||||
|
||||
movl %ecx, %ebx
|
||||
orl $magic, %ebp
|
||||
|
||||
addl $1, %ebp
|
||||
jne L4
|
||||
|
||||
movl $magic, %esi
|
||||
xorl %edx, %ebx
|
||||
|
||||
movl (%eax), %ecx
|
||||
addl %ebx, %esi
|
||||
|
||||
movl $magic, %edi
|
||||
jnc L5
|
||||
|
||||
movl %edi, %ebp
|
||||
xorl %ebx, %esi
|
||||
|
||||
addl %ecx, %ebp
|
||||
orl $magic, %esi
|
||||
|
||||
addl $1, %esi
|
||||
jne L5
|
||||
|
||||
xorl %ecx, %ebp
|
||||
addl %ecx, %edi
|
||||
|
||||
leal 4(%eax), %eax
|
||||
jnc L4
|
||||
|
||||
movl %ecx, %ebx
|
||||
orl $magic, %ebp
|
||||
|
||||
addl $1, %ebp
|
||||
jne L4
|
||||
|
||||
movl $magic, %esi
|
||||
xorl %edx, %ebx
|
||||
|
||||
movl (%eax), %ecx
|
||||
addl %ebx, %esi
|
||||
|
||||
movl $magic, %edi
|
||||
jnc L5
|
||||
|
||||
movl %edi, %ebp
|
||||
xorl %ebx, %esi
|
||||
|
||||
addl %ecx, %ebp
|
||||
orl $magic, %esi
|
||||
|
||||
addl $1, %esi
|
||||
|
||||
je L1
|
||||
|
||||
/* We know there is no NUL byte but a C byte in the word.
|
||||
%ebx contains NUL in this particular byte. */
|
||||
L5: subl $4, %eax /* adjust pointer */
|
||||
testb %bl, %bl /* first byte == C? */
|
||||
|
||||
jz L2 /* yes => return pointer */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
testb %bh, %bh /* second byte == C? */
|
||||
|
||||
jz L2 /* yes => return pointer */
|
||||
|
||||
shrl $16, %ebx /* make upper bytes accessible */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
cmp $0, %bl /* third byte == C */
|
||||
je L2 /* yes => return pointer */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
L2: popl %ebp /* restore saved registers */
|
||||
popl %ebx
|
||||
|
||||
popl %esi
|
||||
popl %edi
|
||||
|
||||
ret
|
||||
|
||||
/* We know there is a NUL byte in the word. But we have to test
|
||||
whether there is an C byte before it in the word. */
|
||||
L4: subl $4, %eax /* adjust pointer */
|
||||
cmpb %dl, %cl /* first byte == C? */
|
||||
|
||||
je L2 /* yes => return pointer */
|
||||
|
||||
cmpb $0, %cl /* first byte == NUL? */
|
||||
je L3 /* yes => return NULL */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
cmpb %dl, %ch /* second byte == C? */
|
||||
je L2 /* yes => return pointer */
|
||||
|
||||
cmpb $0, %ch /* second byte == NUL? */
|
||||
je L3 /* yes => return NULL */
|
||||
|
||||
shrl $16, %ecx /* make upper bytes accessible */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
cmpb %dl, %cl /* third byte == C? */
|
||||
je L2 /* yes => return pointer */
|
||||
|
||||
cmpb $0, %cl /* third byte == NUL? */
|
||||
je L3 /* yes => return NULL */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
/* The test four the fourth byte is necessary! */
|
||||
cmpb %dl, %ch /* fourth byte == C? */
|
||||
je L2 /* yes => return pointer */
|
||||
|
||||
L3: xorl %eax, %eax /* set return value = NULL */
|
||||
|
||||
popl %ebp /* restore saved registers */
|
||||
popl %ebx
|
||||
|
||||
popl %esi
|
||||
popl %edi
|
||||
|
||||
ret
|
||||
|
||||
#undef index
|
||||
weak_alias (strchr, index)
|
185
sysdeps/i386/i586/strlen.S
Normal file
185
sysdeps/i386/i586/strlen.S
Normal file
@ -0,0 +1,185 @@
|
||||
/* strlen -- Compute length og NUL terminated string.
|
||||
Highly optimized version for ix86, x>=5.
|
||||
Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
/* This version is especially optimized for the i586 (and following?)
|
||||
processors. This is mainly done by using the two pipelines. The
|
||||
version optimized for i486 is weak in this aspect because to get
|
||||
as much parallelism we have to executs some *more* instructions.
|
||||
|
||||
The code below is structured to reflect the pairing of the instructions
|
||||
as *I think* it is. I have no processor data book to verify this.
|
||||
If you find something you think is incorrect let me know. */
|
||||
|
||||
|
||||
/* The magic value which is used throughout in the whole code. */
|
||||
#define magic 0xfefefeff
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
str (sp + 4)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY(strlen)
|
||||
movl 4(%esp), %eax /* get string pointer */
|
||||
|
||||
movl %eax, %ecx /* duplicate it */
|
||||
andl $3, %ecx /* mask alignment bits */
|
||||
|
||||
jz L11 /* aligned => start loop */
|
||||
|
||||
cmpb %ch, (%eax) /* is byte NUL? */
|
||||
je L2 /* yes => return */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
cmpl $3, %ecx /* was alignment = 3? */
|
||||
|
||||
je L11 /* yes => now it is aligned and start loop */
|
||||
|
||||
cmpb %ch, (%eax) /* is byte NUL? */
|
||||
je L2 /* yes => return */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
cmpl $2, %ecx /* was alignment = 2? */
|
||||
|
||||
je L11 /* yes => now it is aligned and start loop */
|
||||
|
||||
cmpb %ch, (%eax) /* is byte NUL? */
|
||||
je L2 /* yes => return */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
/* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
|
||||
change any of the hole bits of LONGWORD.
|
||||
|
||||
1) Is this safe? Will it catch all the zero bytes?
|
||||
Suppose there is a byte with all zeros. Any carry bits
|
||||
propagating from its left will fall into the hole at its
|
||||
least significant bit and stop. Since there will be no
|
||||
carry from its most significant bit, the LSB of the
|
||||
byte to the left will be unchanged, and the zero will be
|
||||
detected.
|
||||
|
||||
2) Is this worthwhile? Will it ignore everything except
|
||||
zero bytes? Suppose every byte of LONGWORD has a bit set
|
||||
somewhere. There will be a carry into bit 8. If bit 8
|
||||
is set, this will carry into bit 16. If bit 8 is clear,
|
||||
one of bits 9-15 must be set, so there will be a carry
|
||||
into bit 16. Similarly, there will be a carry into bit
|
||||
24. If one of bits 24-31 is set, there will be a carry
|
||||
into bit 32 (=carry flag), so all of the hole bits will
|
||||
be changed. */
|
||||
L11: xorl %edx, %edx /* We need %edx == 0 for later */
|
||||
|
||||
L1:
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
addl $4, %eax /* adjust pointer for *next* word */
|
||||
|
||||
subl %ecx, %edx /* first step to negate word */
|
||||
addl $magic, %ecx /* add magic word */
|
||||
|
||||
decl %edx /* complete negation of word */
|
||||
jnc L3 /* previous addl caused overflow? */
|
||||
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
subl $magic, %ecx /* undo previous addl to restore word */
|
||||
|
||||
andl $~magic, %edx /* any of the carry flags set? */
|
||||
|
||||
jne L3 /* yes => determine byte */
|
||||
|
||||
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
addl $4, %eax /* adjust pointer for *next* word */
|
||||
|
||||
subl %ecx, %edx /* first step to negate word */
|
||||
addl $magic, %ecx /* add magic word */
|
||||
|
||||
decl %edx /* complete negation of word */
|
||||
jnc L3 /* previous addl caused overflow? */
|
||||
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
subl $magic, %ecx /* undo previous addl to restore word */
|
||||
|
||||
andl $~magic, %edx /* any of the carry flags set? */
|
||||
|
||||
jne L3 /* yes => determine byte */
|
||||
|
||||
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
addl $4, %eax /* adjust pointer for *next* word */
|
||||
|
||||
subl %ecx, %edx /* first step to negate word */
|
||||
addl $magic, %ecx /* add magic word */
|
||||
|
||||
decl %edx /* complete negation of word */
|
||||
jnc L3 /* previous addl caused overflow? */
|
||||
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
subl $magic, %ecx /* undo previous addl to restore word */
|
||||
|
||||
andl $~magic, %edx /* any of the carry flags set? */
|
||||
|
||||
jne L3 /* yes => determine byte */
|
||||
|
||||
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
addl $4, %eax /* adjust pointer for *next* word */
|
||||
|
||||
subl %ecx, %edx /* first step to negate word */
|
||||
addl $magic, %ecx /* add magic word */
|
||||
|
||||
decl %edx /* wcomplete negation of ord */
|
||||
jnc L3 /* previous addl caused overflow? */
|
||||
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
subl $magic, %ecx /* undo previous addl to restore word */
|
||||
|
||||
andl $~magic, %edx /* any of the carry flags set? */
|
||||
|
||||
je L1 /* no => start loop again */
|
||||
|
||||
|
||||
L3: subl $4, %eax /* correct too early pointer increment */
|
||||
testb %cl, %cl /* lowest byte NUL? */
|
||||
|
||||
jz L2 /* yes => return */
|
||||
|
||||
inc %eax /* increment pointer */
|
||||
testb %ch, %ch /* second byte NUL? */
|
||||
|
||||
jz L2 /* yes => return */
|
||||
|
||||
shrl $16, %ecx /* make upper bytes accessible */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
cmpb $0, %cl /* is third byte NUL? */
|
||||
jz L2 /* yes => return */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
L2: subl 4(%esp), %eax /* now compute the length as difference
|
||||
between start and terminating NUL
|
||||
character */
|
||||
|
||||
ret
|
136
sysdeps/i386/i586/sub_n.S
Normal file
136
sysdeps/i386/i586/sub_n.S
Normal file
@ -0,0 +1,136 @@
|
||||
/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
|
||||
and store difference in a third limb vector.
|
||||
|
||||
Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS
|
||||
res_ptr (sp + 4)
|
||||
s1_ptr (sp + 8)
|
||||
s2_ptr (sp + 12)
|
||||
size (sp + 16)
|
||||
*/
|
||||
|
||||
#define r1 %eax
|
||||
#define r2 %edx
|
||||
#define src1 %esi
|
||||
#define src2 %ebp
|
||||
#define dst %edi
|
||||
#define x %ebx
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
.text
|
||||
ALIGN (3)
|
||||
.globl C_SYMBOL_NAME(__mpn_sub_n)
|
||||
C_SYMBOL_NAME(__mpn_sub_n:)
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
movl 20(%esp),dst /* res_ptr */
|
||||
movl 24(%esp),src1 /* s1_ptr */
|
||||
movl 28(%esp),src2 /* s2_ptr */
|
||||
movl 32(%esp),%ecx /* size */
|
||||
|
||||
movl (src2),x
|
||||
|
||||
decl %ecx
|
||||
movl %ecx,r2
|
||||
shrl $3,%ecx
|
||||
andl $7,r2
|
||||
testl %ecx,%ecx /* zero carry flag */
|
||||
jz Lend
|
||||
pushl r2
|
||||
|
||||
ALIGN (3)
|
||||
Loop: movl 28(dst),%eax /* fetch destination cache line */
|
||||
leal 32(dst),dst
|
||||
|
||||
L1: movl (src1),r1
|
||||
movl 4(src1),r2
|
||||
sbbl x,r1
|
||||
movl 4(src2),x
|
||||
sbbl x,r2
|
||||
movl 8(src2),x
|
||||
movl r1,-32(dst)
|
||||
movl r2,-28(dst)
|
||||
|
||||
L2: movl 8(src1),r1
|
||||
movl 12(src1),r2
|
||||
sbbl x,r1
|
||||
movl 12(src2),x
|
||||
sbbl x,r2
|
||||
movl 16(src2),x
|
||||
movl r1,-24(dst)
|
||||
movl r2,-20(dst)
|
||||
|
||||
L3: movl 16(src1),r1
|
||||
movl 20(src1),r2
|
||||
sbbl x,r1
|
||||
movl 20(src2),x
|
||||
sbbl x,r2
|
||||
movl 24(src2),x
|
||||
movl r1,-16(dst)
|
||||
movl r2,-12(dst)
|
||||
|
||||
L4: movl 24(src1),r1
|
||||
movl 28(src1),r2
|
||||
sbbl x,r1
|
||||
movl 28(src2),x
|
||||
sbbl x,r2
|
||||
movl 32(src2),x
|
||||
movl r1,-8(dst)
|
||||
movl r2,-4(dst)
|
||||
|
||||
leal 32(src1),src1
|
||||
leal 32(src2),src2
|
||||
decl %ecx
|
||||
jnz Loop
|
||||
|
||||
popl r2
|
||||
Lend:
|
||||
decl r2 /* test r2 w/o clobbering carry */
|
||||
js Lend2
|
||||
incl r2
|
||||
Loop2:
|
||||
leal 4(dst),dst
|
||||
movl (src1),r1
|
||||
sbbl x,r1
|
||||
movl 4(src2),x
|
||||
movl r1,-4(dst)
|
||||
leal 4(src1),src1
|
||||
leal 4(src2),src2
|
||||
decl r2
|
||||
jnz Loop2
|
||||
Lend2:
|
||||
movl (src1),r1
|
||||
sbbl x,r1
|
||||
movl r1,(dst)
|
||||
|
||||
sbbl %eax,%eax
|
||||
negl %eax
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
82
sysdeps/i386/i586/submul_1.S
Normal file
82
sysdeps/i386/i586/submul_1.S
Normal file
@ -0,0 +1,82 @@
|
||||
/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
||||
the result from a second limb vector.
|
||||
|
||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS
|
||||
res_ptr (sp + 4)
|
||||
s1_ptr (sp + 8)
|
||||
size (sp + 12)
|
||||
s2_limb (sp + 16)
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#define res_ptr edi
|
||||
#define s1_ptr esi
|
||||
#define size ecx
|
||||
#define s2_limb ebp
|
||||
|
||||
TEXT
|
||||
ALIGN (3)
|
||||
GLOBL C_SYMBOL_NAME(__mpn_submul_1)
|
||||
C_SYMBOL_NAME(__mpn_submul_1:)
|
||||
|
||||
INSN1(push,l ,R(edi))
|
||||
INSN1(push,l ,R(esi))
|
||||
INSN1(push,l ,R(ebx))
|
||||
INSN1(push,l ,R(ebp))
|
||||
|
||||
INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
|
||||
INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
|
||||
INSN2(mov,l ,R(size),MEM_DISP(esp,28))
|
||||
INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
|
||||
|
||||
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
|
||||
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
|
||||
INSN1(neg,l ,R(size))
|
||||
INSN2(xor,l ,R(edx),R(edx))
|
||||
ALIGN (3)
|
||||
Loop:
|
||||
INSN2(mov,l ,R(ebx),R(edx))
|
||||
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
|
||||
|
||||
INSN1(mul,l ,R(s2_limb))
|
||||
|
||||
INSN2(add,l ,R(eax),R(ebx))
|
||||
INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4))
|
||||
|
||||
INSN2(adc,l ,R(edx),$0)
|
||||
INSN2(sub,l ,R(ebx),R(eax))
|
||||
|
||||
INSN2(adc,l ,R(edx),$0)
|
||||
INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
|
||||
|
||||
INSN1(inc,l ,R(size))
|
||||
INSN1(jnz, ,Loop)
|
||||
|
||||
|
||||
INSN2(mov,l ,R(eax),R(edx))
|
||||
INSN1(pop,l ,R(ebp))
|
||||
INSN1(pop,l ,R(ebx))
|
||||
INSN1(pop,l ,R(esi))
|
||||
INSN1(pop,l ,R(edi))
|
||||
ret
|
315
sysdeps/i386/memchr.S
Normal file
315
sysdeps/i386/memchr.S
Normal file
@ -0,0 +1,315 @@
|
||||
/* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less
|
||||
than N.
|
||||
For Intel 80x86, x>=3.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
|
||||
Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
This version is developed using the same algorithm as the fast C
|
||||
version which carries the following introduction:
|
||||
|
||||
Based on strlen implemention by Torbjorn Granlund (tege@sics.se),
|
||||
with help from Dan Sahlin (dan@sics.se) and
|
||||
commentary by Jim Blandy (jimb@ai.mit.edu);
|
||||
adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
|
||||
and implemented by Roland McGrath (roland@ai.mit.edu).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
str (sp + 4)
|
||||
c (sp + 8)
|
||||
len (sp + 12)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (memchr)
|
||||
/* Save callee-safe registers used in this function. */
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
/* Load parameters into registers. */
|
||||
movl 12(%esp), %eax /* str: pointer to memory block. */
|
||||
movl 16(%esp), %edx /* c: byte we are looking for. */
|
||||
movl 20(%esp), %esi /* len: length of memory block. */
|
||||
|
||||
/* If my must not test more than three characters test
|
||||
them one by one. This is especially true for 0. */
|
||||
cmpl $4, %esi
|
||||
jb L3
|
||||
|
||||
/* At the moment %edx contains C. What we need for the
|
||||
algorithm is C in all bytes of the dword. Avoid
|
||||
operations on 16 bit words because these require an
|
||||
prefix byte (and one more cycle). */
|
||||
movb %dl, %dh /* Now it is 0|0|c|c */
|
||||
movl %edx, %ecx
|
||||
shll $16, %edx /* Now c|c|0|0 */
|
||||
movw %cx, %dx /* And finally c|c|c|c */
|
||||
|
||||
/* Better performance can be achieved if the word (32
|
||||
bit) memory access is aligned on a four-byte-boundary.
|
||||
So process first bytes one by one until boundary is
|
||||
reached. Don't use a loop for better performance. */
|
||||
|
||||
testb $3, %eax /* correctly aligned ? */
|
||||
je L2 /* yes => begin loop */
|
||||
cmpb %dl, (%eax) /* compare byte */
|
||||
je L9 /* target found => return */
|
||||
incl %eax /* increment source pointer */
|
||||
decl %esi /* decrement length counter */
|
||||
je L4 /* len==0 => return NULL */
|
||||
|
||||
testb $3, %eax /* correctly aligned ? */
|
||||
je L2 /* yes => begin loop */
|
||||
cmpb %dl, (%eax) /* compare byte */
|
||||
je L9 /* target found => return */
|
||||
incl %eax /* increment source pointer */
|
||||
decl %esi /* decrement length counter */
|
||||
je L4 /* len==0 => return NULL */
|
||||
|
||||
testb $3, %eax /* correctly aligned ? */
|
||||
je L2 /* yes => begin loop */
|
||||
cmpb %dl, (%eax) /* compare byte */
|
||||
je L9 /* target found => return */
|
||||
incl %eax /* increment source pointer */
|
||||
decl %esi /* decrement length counter */
|
||||
/* no test for len==0 here, because this is done in the
|
||||
loop head */
|
||||
jmp L2
|
||||
|
||||
/* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
|
||||
change any of the hole bits of LONGWORD.
|
||||
|
||||
1) Is this safe? Will it catch all the zero bytes?
|
||||
Suppose there is a byte with all zeros. Any carry bits
|
||||
propagating from its left will fall into the hole at its
|
||||
least significant bit and stop. Since there will be no
|
||||
carry from its most significant bit, the LSB of the
|
||||
byte to the left will be unchanged, and the zero will be
|
||||
detected.
|
||||
|
||||
2) Is this worthwhile? Will it ignore everything except
|
||||
zero bytes? Suppose every byte of LONGWORD has a bit set
|
||||
somewhere. There will be a carry into bit 8. If bit 8
|
||||
is set, this will carry into bit 16. If bit 8 is clear,
|
||||
one of bits 9-15 must be set, so there will be a carry
|
||||
into bit 16. Similarly, there will be a carry into bit
|
||||
24. If one of bits 24-31 is set, there will be a carry
|
||||
into bit 32 (=carry flag), so all of the hole bits will
|
||||
be changed.
|
||||
|
||||
3) But wait! Aren't we looking for C, not zero?
|
||||
Good point. So what we do is XOR LONGWORD with a longword,
|
||||
each of whose bytes is C. This turns each byte that is C
|
||||
into a zero. */
|
||||
|
||||
|
||||
/* Each round the main loop processes 16 bytes. */
|
||||
|
||||
ALIGN (4)
|
||||
|
||||
L1: movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
|
||||
/* According to the algorithm we had to reverse the effect of the
|
||||
XOR first and then test the overflow bits. But because the
|
||||
following XOR would destroy the carry flag and it would (in a
|
||||
representation with more than 32 bits) not alter then last
|
||||
overflow, we can now test this condition. If no carry is signaled
|
||||
no overflow must have occured in the last byte => it was 0. */
|
||||
jnc L8
|
||||
|
||||
/* We are only interested in carry bits that change due to the
|
||||
previous add, so remove original bits */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
|
||||
/* Now test for the other three overflow bits. */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
|
||||
/* If at least one byte of the word is C we don't get 0 in %edi. */
|
||||
jnz L8 /* found it => return pointer */
|
||||
|
||||
/* This process is unfolded four times for better performance.
|
||||
we don't increment the source pointer each time. Instead we
|
||||
use offsets and increment by 16 in each run of the loop. But
|
||||
before probing for the matching byte we need some extra code
|
||||
(following LL(13) below). Even the len can be compared with
|
||||
constants instead of decrementing each time. */
|
||||
|
||||
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L7 /* highest byte is C => return pointer */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L7 /* found it => return pointer */
|
||||
|
||||
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L6 /* highest byte is C => return pointer */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L6 /* found it => return pointer */
|
||||
|
||||
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L5 /* highest byte is C => return pointer */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L5 /* found it => return pointer */
|
||||
|
||||
/* Adjust both counters for a full round, i.e. 16 bytes. */
|
||||
addl $16, %eax
|
||||
L2: subl $16, %esi
|
||||
jae L1 /* Still more than 16 bytes remaining */
|
||||
|
||||
/* Process remaining bytes separately. */
|
||||
cmpl $4-16, %esi /* rest < 4 bytes? */
|
||||
jb L3 /* yes, than test byte by byte */
|
||||
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L8 /* highest byte is C => return pointer */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jne L8 /* found it => return pointer */
|
||||
addl $4, %eax /* adjust source pointer */
|
||||
|
||||
cmpl $8-16, %esi /* rest < 8 bytes? */
|
||||
jb L3 /* yes, than test byte by byte */
|
||||
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L8 /* highest byte is C => return pointer */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jne L8 /* found it => return pointer */
|
||||
addl $4, %eax /* adjust source pointer */
|
||||
|
||||
cmpl $12-16, %esi /* rest < 12 bytes? */
|
||||
jb L3 /* yes, than test byte by byte */
|
||||
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L8 /* highest byte is C => return pointer */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jne L8 /* found it => return pointer */
|
||||
addl $4, %eax /* adjust source pointer */
|
||||
|
||||
/* Check the remaining bytes one by one. */
|
||||
L3: andl $3, %esi /* mask out uninteresting bytes */
|
||||
jz L4 /* no remaining bytes => return NULL */
|
||||
|
||||
cmpb %dl, (%eax) /* compare byte with C */
|
||||
je L9 /* equal, than return pointer */
|
||||
incl %eax /* increment source pointer */
|
||||
decl %esi /* decrement length */
|
||||
jz L4 /* no remaining bytes => return NULL */
|
||||
|
||||
cmpb %dl, (%eax) /* compare byte with C */
|
||||
je L9 /* equal, than return pointer */
|
||||
incl %eax /* increment source pointer */
|
||||
decl %esi /* decrement length */
|
||||
jz L4 /* no remaining bytes => return NULL */
|
||||
|
||||
cmpb %dl, (%eax) /* compare byte with C */
|
||||
je L9 /* equal, than return pointer */
|
||||
|
||||
L4: /* no byte found => return NULL */
|
||||
xorl %eax, %eax
|
||||
jmp L9
|
||||
|
||||
/* add missing source pointer increments */
|
||||
L5: addl $4, %eax
|
||||
L6: addl $4, %eax
|
||||
L7: addl $4, %eax
|
||||
|
||||
/* Test for the matching byte in the word. %ecx contains a NUL
|
||||
char in the byte which originally was the byte we are looking
|
||||
at. */
|
||||
L8: testb %cl, %cl /* test first byte in dword */
|
||||
jz L9 /* if zero => return pointer */
|
||||
incl %eax /* increment source pointer */
|
||||
|
||||
testb %ch, %ch /* test second byte in dword */
|
||||
jz L9 /* if zero => return pointer */
|
||||
incl %eax /* increment source pointer */
|
||||
|
||||
testl $0xff0000, %ecx /* test third byte in dword */
|
||||
jz L9 /* if zero => return pointer */
|
||||
incl %eax /* increment source pointer */
|
||||
|
||||
/* No further test needed we we known it is one of the four byytes. */
|
||||
|
||||
L9: popl %edi /* pop saved registers */
|
||||
popl %esi
|
||||
|
||||
ret
|
@ -1,48 +0,0 @@
|
||||
/* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less
|
||||
than N.
|
||||
For Intel 80x86, x>=3.
|
||||
Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc.
|
||||
Contributed by Torbjorn Granlund (tege@sics.se).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
|
||||
Cambridge, MA 02139, USA. */
|
||||
|
||||
#include <ansidecl.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
PTR
|
||||
DEFUN(memchr, (str, c, len),
|
||||
CONST PTR str AND int c AND size_t len)
|
||||
{
|
||||
PTR retval;
|
||||
asm("cld\n" /* Search forward. */
|
||||
"testl %1,%1\n" /* Clear Z flag, to handle LEN == 0. */
|
||||
/* Some old versions of gas need `repne' instead of `repnz'. */
|
||||
"repnz\n" /* Search for C in al. */
|
||||
"scasb\n"
|
||||
"movl %2,%0\n" /* Set %0 to 0 (without affecting Z flag). */
|
||||
"jnz done\n" /* Jump if we found nothing equal to C. */
|
||||
"leal -1(%1),%0\n" /* edi has been incremented. Return edi-1. */
|
||||
"done:" :
|
||||
"=a" (retval), "=D" (str), "=c" (len) :
|
||||
"0" (c), "1" (str), "2" (len));
|
||||
return retval;
|
||||
}
|
||||
|
||||
#else
|
||||
#include <sysdeps/generic/memchr.c>
|
||||
#endif
|
68
sysdeps/i386/memcmp.S
Normal file
68
sysdeps/i386/memcmp.S
Normal file
@ -0,0 +1,68 @@
|
||||
/* memcmp -- compare two memory blocks for differences in the first COUNT
|
||||
bytes.
|
||||
Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
block1 (sp + 4)
|
||||
block2 (sp + 8)
|
||||
len (sp + 12)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (memcmp)
|
||||
pushl %esi /* Save callee-safe registers. */
|
||||
movl %edi, %edx /* Note that %edx is not used and can
|
||||
so be used to save %edi. It's faster. */
|
||||
|
||||
movl 12(%esp), %esi /* Load address of block #1. */
|
||||
movl 16(%esp), %edi /* Load address of block #2. */
|
||||
movl 20(%esp), %ecx /* Load maximal length of compare area. */
|
||||
|
||||
cld /* Set direction of comparison. */
|
||||
|
||||
xorl %eax, %eax /* Default result. */
|
||||
|
||||
repe /* Compare at most %ecx bytes. */
|
||||
cmpsb
|
||||
jz L1 /* If even last byte was equal we return 0. */
|
||||
|
||||
/* The memory blocks are not equal. So result of the last
|
||||
subtraction is present in the carry flag. It is set when
|
||||
the byte in block #2 is bigger. In this case we have to
|
||||
return -1 (=0xffffffff), else 1. */
|
||||
sbbl %eax, %eax /* This is tricky. %eax == 0 and carry is set
|
||||
or not depending on last subtraction. */
|
||||
|
||||
/* At this point %eax == 0, if the byte of block #1 was bigger, and
|
||||
0xffffffff if the last byte of block #2 was bigger. The later
|
||||
case is already correct but the former needs a little adjustment.
|
||||
Note that the following operation does not change 0xffffffff. */
|
||||
orb $1, %al /* Change 0 to 1. */
|
||||
|
||||
L1: popl %esi /* Restore registers. */
|
||||
movl %edx, %edi
|
||||
|
||||
ret
|
||||
|
||||
#undef bcmp
|
||||
weak_alias (memcmp, bcmp)
|
87
sysdeps/i386/stpcpy.S
Normal file
87
sysdeps/i386/stpcpy.S
Normal file
@ -0,0 +1,87 @@
|
||||
/* stpcpy -- copy SRC to DEST returning the address of the terminating '\0'
|
||||
in DEST.
|
||||
For Intel 80x86, x>=3.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper (drepper@gnu.ai.mit.edu).
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* This function is defined neither in ANSI nor POSIX standards but is
|
||||
also not invented here. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
dest (sp + 4)
|
||||
src (sp + 8)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (__stpcpy)
|
||||
movl 4(%esp), %eax /* load destination pointer */
|
||||
movl 8(%esp), %ecx /* load source pointer */
|
||||
|
||||
subl %eax, %ecx /* magic: reduce number of loop variants
|
||||
to one using addressing mode */
|
||||
|
||||
/* Here we would like to write
|
||||
|
||||
subl $4, %eax
|
||||
ALIGN (4)
|
||||
|
||||
but the assembler is too smart and optimizes for the shortest
|
||||
form where the number only needs one byte. But if we could
|
||||
have the long form we would not need the alignment. */
|
||||
|
||||
.byte 0x81, 0xe8 /* This is `subl $0x00000004, %eax' */
|
||||
.long 0x00000004
|
||||
|
||||
/* Four times unfolded loop with only one loop counter. This
|
||||
is achieved by the use of index+base adressing mode. As the
|
||||
loop counter we use the destination address because this is
|
||||
also the result. */
|
||||
L1: addl $4, %eax /* increment loop counter */
|
||||
|
||||
movb (%eax,%ecx), %dl /* load current char */
|
||||
movb %dl, (%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L2 /* yes, then exit */
|
||||
|
||||
movb 1(%eax,%ecx), %dl /* load current char */
|
||||
movb %dl, 1(%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L3 /* yes, then exit */
|
||||
|
||||
movb 2(%eax,%ecx), %dl /* load current char */
|
||||
movb %dl, 2(%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L4 /* yes, then exit */
|
||||
|
||||
movb 3(%eax,%ecx), %dl /* load current char */
|
||||
movb %dl, 3(%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jnz L1 /* no, then continue loop */
|
||||
|
||||
incl %eax /* correct loop counter */
|
||||
L4: incl %eax
|
||||
L3: incl %eax
|
||||
L2:
|
||||
ret
|
||||
|
||||
weak_alias (__stpcpy, stpcpy)
|
143
sysdeps/i386/stpncpy.S
Normal file
143
sysdeps/i386/stpncpy.S
Normal file
@ -0,0 +1,143 @@
|
||||
/* stpncpy -- copy no more then N bytes from SRC to DEST, returning the
|
||||
address of the terminating '\0' in DEST.
|
||||
For Intel 80x86, x>=3.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
|
||||
Some bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
||||
- original wrote n+1 chars in some cases.
|
||||
- stpncpy() ought to behave like strncpy() ie. not null-terminate
|
||||
if limited by n. glibc-1.09 stpncpy() does this.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
dest (sp + 4)
|
||||
src (sp + 8)
|
||||
maxlen (sp + 12)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (__stpncpy)
|
||||
|
||||
pushl %esi
|
||||
|
||||
movl 8(%esp), %eax /* load destination pointer */
|
||||
movl 12(%esp), %esi /* load source pointer */
|
||||
movl 16(%esp), %ecx /* load maximal length */
|
||||
|
||||
subl %eax, %esi /* magic: reduce number of loop variants
|
||||
to one using addressing mode */
|
||||
jmp L1 /* jump to loop "head" */
|
||||
|
||||
ALIGN(4)
|
||||
|
||||
/* Four times unfolded loop with two loop counters. We get the
|
||||
the third value (the source address) by using the index+base
|
||||
adressing mode. */
|
||||
L2: movb (%eax,%esi), %dl /* load current char */
|
||||
movb %dl, (%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L7 /* yes, then exit */
|
||||
|
||||
movb 1(%eax,%esi), %dl /* load current char */
|
||||
movb %dl, 1(%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L6 /* yes, then exit */
|
||||
|
||||
movb 2(%eax,%esi), %dl /* load current char */
|
||||
movb %dl, 2(%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L5 /* yes, then exit */
|
||||
|
||||
movb 3(%eax,%esi), %dl /* load current char */
|
||||
movb %dl, 3(%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L4 /* yes, then exit */
|
||||
|
||||
addl $4, %eax /* increment loop counter for full round */
|
||||
|
||||
L1: subl $4, %ecx /* still more than 4 bytes allowed? */
|
||||
jae L2 /* yes, then go to start of loop */
|
||||
|
||||
/* The maximal remaining 15 bytes are not processed in a loop. */
|
||||
|
||||
addl $4, %ecx /* correct above subtraction */
|
||||
jz L9 /* maximal allowed char reached => go to end */
|
||||
|
||||
movb (%eax,%esi), %dl /* load current char */
|
||||
movb %dl, (%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L3 /* yes, then exit */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
decl %ecx /* decrement length counter */
|
||||
jz L9 /* no more allowed => exit */
|
||||
|
||||
movb (%eax,%esi), %dl /* load current char */
|
||||
movb %dl, (%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L3 /* yes, then exit */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
decl %ecx /* decrement length counter */
|
||||
jz L9 /* no more allowed => exit */
|
||||
|
||||
movb (%eax,%esi), %dl /* load current char */
|
||||
movb %dl, (%eax) /* and store it */
|
||||
testb %dl, %dl /* was it NUL? */
|
||||
jz L3 /* yes, then exit */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
jmp L9 /* we don't have to test for counter underflow
|
||||
because we know we had a most 3 bytes
|
||||
remaining => exit */
|
||||
|
||||
/* When coming from the main loop we have to adjust the pointer. */
|
||||
L4: decl %ecx /* decrement counter */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
L5: decl %ecx /* increment pointer */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
L6: decl %ecx /* increment pointer */
|
||||
incl %eax /* increment pointer */
|
||||
L7:
|
||||
|
||||
addl $3, %ecx /* correct pre-decrementation of counter
|
||||
at the beginning of the loop; but why 3
|
||||
and not 4? Very simple, we have to count
|
||||
the NUL char we already wrote. */
|
||||
jz L9 /* counter is also 0 => exit */
|
||||
|
||||
/* We now have to fill the rest of the buffer with NUL. This
|
||||
is done in a tricky way. Please note that the adressing mode
|
||||
used below is not the same we used above. Here we use the
|
||||
%ecx register. */
|
||||
L8:
|
||||
movb $0, (%ecx,%eax) /* store NUL char */
|
||||
L3: decl %ecx /* all bytes written? */
|
||||
jnz L8 /* no, then again */
|
||||
|
||||
L9: popl %esi /* restore saved register content */
|
||||
|
||||
ret
|
||||
|
||||
weak_alias (__stpncpy, stpncpy)
|
278
sysdeps/i386/strchr.S
Normal file
278
sysdeps/i386/strchr.S
Normal file
@ -0,0 +1,278 @@
|
||||
/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
|
||||
For Intel 80x86, x>=3.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
|
||||
Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
str (sp + 4)
|
||||
ch (sp + 8)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (strchr)
|
||||
pushl %edi /* Save callee-safe registers used here. */
|
||||
|
||||
movl 8(%esp), %eax /* get string pointer */
|
||||
movl 12(%esp), %edx /* get character we are looking for */
|
||||
|
||||
/* At the moment %edx contains C. What we need for the
|
||||
algorithm is C in all bytes of the dword. Avoid
|
||||
operations on 16 bit words because these require an
|
||||
prefix byte (and one more cycle). */
|
||||
movb %dl, %dh /* now it is 0|0|c|c */
|
||||
movl %edx, %ecx
|
||||
shll $16, %edx /* now it is c|c|0|0 */
|
||||
movw %cx, %dx /* and finally c|c|c|c */
|
||||
|
||||
/* Before we start with the main loop we process single bytes
|
||||
until the source pointer is aligned. This has two reasons:
|
||||
1. aligned 32-bit memory access is faster
|
||||
and (more important)
|
||||
2. we process in the main loop 32 bit in one step although
|
||||
we don't know the end of the string. But accessing at
|
||||
4-byte alignment guarantees that we never access illegal
|
||||
memory if this would not also be done by the trivial
|
||||
implementation (this is because all processor inherant
|
||||
boundaries are multiples of 4. */
|
||||
|
||||
testb $3, %eax /* correctly aligned ? */
|
||||
jz L11 /* yes => begin loop */
|
||||
movb (%eax), %cl /* load byte in question (we need it twice) */
|
||||
cmpb %cl, %dl /* compare byte */
|
||||
je L6 /* target found => return */
|
||||
testb %cl, %cl /* is NUL? */
|
||||
jz L2 /* yes => return NULL */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
testb $3, %eax /* correctly aligned ? */
|
||||
jz L11 /* yes => begin loop */
|
||||
movb (%eax), %cl /* load byte in question (we need it twice) */
|
||||
cmpb %cl, %dl /* compare byte */
|
||||
je L6 /* target found => return */
|
||||
testb %cl, %cl /* is NUL? */
|
||||
jz L2 /* yes => return NULL */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
testb $3, %eax /* correctly aligned ? */
|
||||
jz L11 /* yes => begin loop */
|
||||
movb (%eax), %cl /* load byte in question (we need it twice) */
|
||||
cmpb %cl, %dl /* compare byte */
|
||||
je L6 /* target found => return */
|
||||
testb %cl, %cl /* is NUL? */
|
||||
jz L2 /* yes => return NULL */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
/* No we have reached alignment. */
|
||||
jmp L11 /* begin loop */
|
||||
|
||||
/* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
|
||||
change any of the hole bits of LONGWORD.
|
||||
|
||||
1) Is this safe? Will it catch all the zero bytes?
|
||||
Suppose there is a byte with all zeros. Any carry bits
|
||||
propagating from its left will fall into the hole at its
|
||||
least significant bit and stop. Since there will be no
|
||||
carry from its most significant bit, the LSB of the
|
||||
byte to the left will be unchanged, and the zero will be
|
||||
detected.
|
||||
|
||||
2) Is this worthwhile? Will it ignore everything except
|
||||
zero bytes? Suppose every byte of LONGWORD has a bit set
|
||||
somewhere. There will be a carry into bit 8. If bit 8
|
||||
is set, this will carry into bit 16. If bit 8 is clear,
|
||||
one of bits 9-15 must be set, so there will be a carry
|
||||
into bit 16. Similarly, there will be a carry into bit
|
||||
24. If one of bits 24-31 is set, there will be a carry
|
||||
into bit 32 (=carry flag), so all of the hole bits will
|
||||
be changed.
|
||||
|
||||
3) But wait! Aren't we looking for C, not zero?
|
||||
Good point. So what we do is XOR LONGWORD with a longword,
|
||||
each of whose bytes is C. This turns each byte that is C
|
||||
into a zero. */
|
||||
|
||||
/* Each round the main loop processes 16 bytes. */
|
||||
|
||||
ALIGN(4)
|
||||
|
||||
L1: addl $16, %eax /* adjust pointer for whole round */
|
||||
|
||||
L11: movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* C */
|
||||
|
||||
/* According to the algorithm we had to reverse the effect of the
|
||||
XOR first and then test the overflow bits. But because the
|
||||
following XOR would destroy the carry flag and it would (in a
|
||||
representation with more than 32 bits) not alter then last
|
||||
overflow, we can now test this condition. If no carry is signaled
|
||||
no overflow must have occured in the last byte => it was 0. */
|
||||
jnc L7
|
||||
|
||||
/* We are only interested in carry bits that change due to the
|
||||
previous add, so remove original bits */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
|
||||
/* Now test for the other three overflow bits. */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
|
||||
/* If at least one byte of the word is C we don't get 0 in %edi. */
|
||||
jnz L7 /* found it => return pointer */
|
||||
|
||||
/* Now we made sure the dword does not contain the character we are
|
||||
looking for. But because we deal with strings we have to check
|
||||
for the end of string before testing the next dword. */
|
||||
|
||||
xorl %edx, %ecx /* restore original dword without reload */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L2 /* highest byte is NUL => return NULL */
|
||||
xorl %ecx, %edi /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L2 /* found NUL => return NULL */
|
||||
|
||||
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* C */
|
||||
jnc L71 /* highest byte is C => return pointer */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L71 /* found it => return pointer */
|
||||
xorl %edx, %ecx /* restore original dword without reload */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L2 /* highest byte is NUL => return NULL */
|
||||
xorl %ecx, %edi /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L2 /* found NUL => return NULL */
|
||||
|
||||
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* C */
|
||||
jnc L72 /* highest byte is C => return pointer */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L72 /* found it => return pointer */
|
||||
xorl %edx, %ecx /* restore original dword without reload */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L2 /* highest byte is NUL => return NULL */
|
||||
xorl %ecx, %edi /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L2 /* found NUL => return NULL */
|
||||
|
||||
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* C */
|
||||
jnc L73 /* highest byte is C => return pointer */
|
||||
xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L73 /* found it => return pointer */
|
||||
xorl %edx, %ecx /* restore original dword without reload */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %ecx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L2 /* highest byte is NUL => return NULL */
|
||||
xorl %ecx, %edi /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jz L1 /* no NUL found => restart loop */
|
||||
|
||||
L2: /* Return NULL. */
|
||||
xorl %eax, %eax /* load NULL in return value register */
|
||||
popl %edi /* restore saved register content */
|
||||
ret
|
||||
|
||||
L73: addl $4, %eax /* adjust pointer */
|
||||
L72: addl $4, %eax
|
||||
L71: addl $4, %eax
|
||||
|
||||
/* We now scan for the byte in which the character was matched.
|
||||
But we have to take care of the case that a NUL char is
|
||||
found before this in the dword. */
|
||||
|
||||
L7: testb %cl, %cl /* is first byte C? */
|
||||
jz L6 /* yes => return pointer */
|
||||
cmpb %dl, %cl /* is first byte NUL? */
|
||||
je L2 /* yes => return NULL */
|
||||
incl %eax /* it's not in the first byte */
|
||||
|
||||
testb %ch, %ch /* is second byte C? */
|
||||
jz L6 /* yes => return pointer */
|
||||
cmpb %dl, %ch /* is second byte NUL? */
|
||||
je L2 /* yes => return NULL? */
|
||||
incl %eax /* it's not in the second byte */
|
||||
|
||||
shrl $16, %ecx /* make upper byte accessible */
|
||||
testb %cl, %cl /* is third byte C? */
|
||||
jz L6 /* yes => return pointer */
|
||||
cmpb %dl, %cl /* is third byte NUL? */
|
||||
je L2 /* yes => return NULL */
|
||||
|
||||
/* It must be in the fourth byte and it cannot be NUL. */
|
||||
incl %eax
|
||||
|
||||
L6: popl %edi /* restore saved register content */
|
||||
|
||||
ret
|
||||
|
||||
weak_alias (strchr, index)
|
176
sysdeps/i386/strcspn.S
Normal file
176
sysdeps/i386/strcspn.S
Normal file
@ -0,0 +1,176 @@
|
||||
/* strcspn (str, ss) -- Return the length of the initial segement of STR
|
||||
which contains no characters from SS.
|
||||
For Intel 80x86, x>=3.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
|
||||
Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
str (sp + 4)
|
||||
stopset (sp + 8)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (strcspn)
|
||||
movl 4(%esp), %edx /* get string pointer */
|
||||
movl 8(%esp), %eax /* get stopset pointer */
|
||||
|
||||
/* First we create a table with flags for all possible characters.
|
||||
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
|
||||
supported by the C string functions we have 256 characters.
|
||||
Before inserting marks for the stop characters we clear the whole
|
||||
table. The unrolled form is much faster than a loop. */
|
||||
xorl %ecx, %ecx /* %ecx = 0 !!! */
|
||||
|
||||
pushl %ecx /* make a 256 bytes long block filled with 0 */
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl $0 /* These immediate values make the label 2 */
|
||||
pushl $0 /* to be aligned on a 16 byte boundary to */
|
||||
pushl $0 /* get a better performance of the loop. */
|
||||
pushl $0
|
||||
pushl $0
|
||||
pushl $0
|
||||
|
||||
/* For understanding the following code remember that %ecx == 0 now.
|
||||
Although all the following instruction only modify %cl we always
|
||||
have a correct zero-extended 32-bit value in %ecx. */
|
||||
|
||||
/* Don't change the "testb $0xff,%%cl" to "testb %%cl,%%cl". We want
|
||||
longer instructions so that the next loop aligns without adding nops. */
|
||||
|
||||
L2: movb (%eax), %cl /* get byte from stopset */
|
||||
testb %cl, %cl /* is NUL char? */
|
||||
jz L1 /* yes => start compare loop */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 1(%eax), %cl /* get byte from stopset */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jz L1 /* yes => start compare loop */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 2(%eax), %cl /* get byte from stopset */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jz L1 /* yes => start compare loop */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 3(%eax), %cl /* get byte from stopset */
|
||||
addl $4, %eax /* increment stopset pointer */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jnz L2 /* no => process next dword from stopset */
|
||||
|
||||
L1: leal -4(%edx), %eax /* prepare loop */
|
||||
|
||||
/* We use a neat trick for the following loop. Normally we would
|
||||
have to test for two termination conditions
|
||||
1. a character in the stopset was found
|
||||
and
|
||||
2. the end of the string was found
|
||||
But as a sign that the chracter is in the stopset we store its
|
||||
value in the table. But the value of NUL is NUL so the loop
|
||||
terminates for NUL in every case. */
|
||||
|
||||
L3: addl $4, %eax /* adjust pointer for full loop round */
|
||||
|
||||
movb (%eax), %cl /* get byte from string */
|
||||
cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
|
||||
je L4 /* yes => return */
|
||||
|
||||
movb 1(%eax), %cl /* get byte from string */
|
||||
cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
|
||||
je L5 /* yes => return */
|
||||
|
||||
movb 2(%eax), %cl /* get byte from string */
|
||||
cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
|
||||
je L6 /* yes => return */
|
||||
|
||||
movb 3(%eax), %cl /* get byte from string */
|
||||
cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
|
||||
jne L3 /* yes => return */
|
||||
|
||||
incl %eax /* adjust pointer */
|
||||
L6: incl %eax
|
||||
L5: incl %eax
|
||||
|
||||
L4: subl %edx, %eax /* we have to return the number of valid
|
||||
characters, so compute distance to first
|
||||
non-valid character */
|
||||
addl $256, %esp /* remove stopset */
|
||||
|
||||
ret
|
177
sysdeps/i386/strpbrk.S
Normal file
177
sysdeps/i386/strpbrk.S
Normal file
@ -0,0 +1,177 @@
|
||||
/* strcspn (str, ss) -- Return the length of the initial segement of STR
|
||||
which contains no characters from SS.
|
||||
For Intel 80x86, x>=3.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
|
||||
Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
str (sp + 4)
|
||||
stopset (sp + 8)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (strpbrk)
|
||||
movl 4(%esp), %edx /* get string pointer */
|
||||
movl 8(%esp), %eax /* get stopset pointer */
|
||||
|
||||
/* First we create a table with flags for all possible characters.
|
||||
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
|
||||
supported by the C string functions we have 256 characters.
|
||||
Before inserting marks for the stop characters we clear the whole
|
||||
table. The unrolled form is much faster than a loop. */
|
||||
xorl %ecx, %ecx /* %ecx = 0 !!! */
|
||||
|
||||
pushl %ecx /* make a 256 bytes long block filled with 0 */
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl $0 /* These immediate values make the label 2 */
|
||||
pushl $0 /* to be aligned on a 16 byte boundary to */
|
||||
pushl $0 /* get a better performance of the loop. */
|
||||
pushl $0
|
||||
pushl $0
|
||||
pushl $0
|
||||
|
||||
/* For understanding the following code remember that %ecx == 0 now.
|
||||
Although all the following instruction only modify %cl we always
|
||||
have a correct zero-extended 32-bit value in %ecx. */
|
||||
|
||||
/* Don't change the "testb $0xff,%%cl" to "testb %%cl,%%cl". We want
|
||||
longer instructions so that the next loop aligns without adding nops. */
|
||||
|
||||
L2: movb (%eax), %cl /* get byte from stopset */
|
||||
testb %cl, %cl /* is NUL char? */
|
||||
jz L1 /* yes => start compare loop */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 1(%eax), %cl /* get byte from stopset */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jz L1 /* yes => start compare loop */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 2(%eax), %cl /* get byte from stopset */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jz L1 /* yes => start compare loop */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 3(%eax), %cl /* get byte from stopset */
|
||||
addl $4, %eax /* increment stopset pointer */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jnz L2 /* no => process next dword from stopset */
|
||||
|
||||
L1: leal -4(%edx), %eax /* prepare loop */
|
||||
|
||||
/* We use a neat trick for the following loop. Normally we would
|
||||
have to test for two termination conditions
|
||||
1. a character in the stopset was found
|
||||
and
|
||||
2. the end of the string was found
|
||||
But as a sign that the chracter is in the stopset we store its
|
||||
value in the table. But the value of NUL is NUL so the loop
|
||||
terminates for NUL in every case. */
|
||||
|
||||
L3: addl $4, %eax /* adjust pointer for full loop round */
|
||||
|
||||
movb (%eax), %cl /* get byte from string */
|
||||
cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
|
||||
je L4 /* yes => return */
|
||||
|
||||
movb 1(%eax), %cl /* get byte from string */
|
||||
cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
|
||||
je L5 /* yes => return */
|
||||
|
||||
movb 2(%eax), %cl /* get byte from string */
|
||||
cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
|
||||
je L6 /* yes => return */
|
||||
|
||||
movb 3(%eax), %cl /* get byte from string */
|
||||
cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
|
||||
jne L3 /* yes => return */
|
||||
|
||||
incl %eax /* adjust pointer */
|
||||
L6: incl %eax
|
||||
L5: incl %eax
|
||||
|
||||
L4: addl $256, %esp /* remove stopset */
|
||||
|
||||
orb %cl, %cl /* was last character NUL? */
|
||||
jnz L7 /* no => return pointer */
|
||||
xorl %eax, %eax /* return NULL */
|
||||
|
||||
L7: ret
|
321
sysdeps/i386/strrchr.S
Normal file
321
sysdeps/i386/strrchr.S
Normal file
@ -0,0 +1,321 @@
|
||||
/* strchr (str, ch) -- Return pointer to last occurrence of CH in STR.
|
||||
For Intel 80x86, x>=3.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
|
||||
Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
str (sp + 4)
|
||||
ch (sp + 8)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (strrchr)
|
||||
pushl %edi /* Save callee-safe registers used here. */
|
||||
pushl %esi
|
||||
|
||||
xorl %eax, %eax
|
||||
movl 12(%esp), %esi /* get string pointer */
|
||||
movl 16(%esp), %ecx /* get character we are looking for */
|
||||
|
||||
/* At the moment %ecx contains C. What we need for the
|
||||
algorithm is C in all bytes of the dword. Avoid
|
||||
operations on 16 bit words because these require an
|
||||
prefix byte (and one more cycle). */
|
||||
movb %cl, %ch /* now it is 0|0|c|c */
|
||||
movl %ecx, %edx
|
||||
shll $16, %ecx /* now it is c|c|0|0 */
|
||||
movw %dx, %cx /* and finally c|c|c|c */
|
||||
|
||||
/* Before we start with the main loop we process single bytes
|
||||
until the source pointer is aligned. This has two reasons:
|
||||
1. aligned 32-bit memory access is faster
|
||||
and (more important)
|
||||
2. we process in the main loop 32 bit in one step although
|
||||
we don't know the end of the string. But accessing at
|
||||
4-byte alignment guarantees that we never access illegal
|
||||
memory if this would not also be done by the trivial
|
||||
implementation (this is because all processor inherant
|
||||
boundaries are multiples of 4. */
|
||||
|
||||
testb $3, %esi /* correctly aligned ? */
|
||||
jz L19 /* yes => begin loop */
|
||||
movb (%esi), %dl /* load byte in question (we need it twice) */
|
||||
cmpb %dl, %cl /* compare byte */
|
||||
jne L11 /* target found => return */
|
||||
movl %esi, %eax /* remember pointer as possible result */
|
||||
L11: orb %dl, %dl /* is NUL? */
|
||||
jz L2 /* yes => return NULL */
|
||||
incl %esi /* increment pointer */
|
||||
|
||||
testb $3, %esi /* correctly aligned ? */
|
||||
jz L19 /* yes => begin loop */
|
||||
movb (%esi), %dl /* load byte in question (we need it twice) */
|
||||
cmpb %dl, %cl /* compare byte */
|
||||
jne L12 /* target found => return */
|
||||
movl %esi, %eax /* remember pointer as result */
|
||||
L12: orb %dl, %dl /* is NUL? */
|
||||
jz L2 /* yes => return NULL */
|
||||
incl %esi /* increment pointer */
|
||||
|
||||
testb $3, %esi /* correctly aligned ? */
|
||||
jz L19 /* yes => begin loop */
|
||||
movb (%esi), %dl /* load byte in question (we need it twice) */
|
||||
cmpb %dl, %cl /* compare byte */
|
||||
jne L13 /* target found => return */
|
||||
movl %esi, %eax /* remember pointer as result */
|
||||
L13: orb %cl, %cl /* is NUL? */
|
||||
jz L2 /* yes => return NULL */
|
||||
incl %esi /* increment pointer */
|
||||
|
||||
/* No we have reached alignment. */
|
||||
jmp L19 /* begin loop */
|
||||
|
||||
/* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
|
||||
change any of the hole bits of LONGWORD.
|
||||
|
||||
1) Is this safe? Will it catch all the zero bytes?
|
||||
Suppose there is a byte with all zeros. Any carry bits
|
||||
propagating from its left will fall into the hole at its
|
||||
least significant bit and stop. Since there will be no
|
||||
carry from its most significant bit, the LSB of the
|
||||
byte to the left will be unchanged, and the zero will be
|
||||
detected.
|
||||
|
||||
2) Is this worthwhile? Will it ignore everything except
|
||||
zero bytes? Suppose every byte of LONGWORD has a bit set
|
||||
somewhere. There will be a carry into bit 8. If bit 8
|
||||
is set, this will carry into bit 16. If bit 8 is clear,
|
||||
one of bits 9-15 must be set, so there will be a carry
|
||||
into bit 16. Similarly, there will be a carry into bit
|
||||
24. If one of bits 24-31 is set, there will be a carry
|
||||
into bit 32 (=carry flag), so all of the hole bits will
|
||||
be changed.
|
||||
|
||||
3) But wait! Aren't we looking for C, not zero?
|
||||
Good point. So what we do is XOR LONGWORD with a longword,
|
||||
each of whose bytes is C. This turns each byte that is C
|
||||
into a zero. */
|
||||
|
||||
/* Each round the main loop processes 16 bytes. */
|
||||
|
||||
/* Jump to here when the character is detected. We chose this
|
||||
way around because the character one is looking for is not
|
||||
as frequent as the rest and taking a conditional jump is more
|
||||
expensive than ignoring it.
|
||||
|
||||
Some more words to the code below: it might not be obvious why
|
||||
we decrement the source pointer here. In the loop the pointer
|
||||
is not pre-incremented and so it still points before the word
|
||||
we are looking at. But you should take a look at the instruction
|
||||
which gets executed before we get into the loop: `addl $16, %esi'.
|
||||
This makes the following subs into adds. */
|
||||
|
||||
/* These fill bytes make the main loop be correctly aligned.
|
||||
We cannot use align because it is not the following instruction
|
||||
which should be aligned. */
|
||||
.byte 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
L4: subl $4, %esi /* adjust pointer */
|
||||
L41: subl $4, %esi
|
||||
L42: subl $4, %esi
|
||||
L43: testl $0xff000000, %edx /* is highest byte == C? */
|
||||
jnz L33 /* no => try other bytes */
|
||||
leal 15(%esi), %eax /* store address as result */
|
||||
jmp L1 /* and start loop again */
|
||||
|
||||
L3: subl $4, %esi /* adjust pointer */
|
||||
L31: subl $4, %esi
|
||||
L32: subl $4, %esi
|
||||
L33: testl $0xff0000, %edx /* is C in third byte? */
|
||||
jnz L51 /* no => try other bytes */
|
||||
leal 14(%esi), %eax /* store address as result */
|
||||
jmp L1 /* and start loop again */
|
||||
|
||||
L51:
|
||||
/* At this point we know that the byte is in one of the lower bytes.
|
||||
We make a guess and correct it if necessary. This reduces the
|
||||
number of necessary jumps. */
|
||||
leal 12(%esi), %eax /* guess address of lowest byte as result */
|
||||
testb %dh, %dh /* is guess correct? */
|
||||
jnz L1 /* yes => start loop */
|
||||
leal 13(%esi), %eax /* correct guess to second byte */
|
||||
|
||||
L1: addl $16, %esi /* increment pointer for full round */
|
||||
|
||||
L19: movl (%esi), %edx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %edx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
|
||||
/* According to the algorithm we had to reverse the effect of the
|
||||
XOR first and then test the overflow bits. But because the
|
||||
following XOR would destroy the carry flag and it would (in a
|
||||
representation with more than 32 bits) not alter then last
|
||||
overflow, we can now test this condition. If no carry is signaled
|
||||
no overflow must have occured in the last byte => it was 0. */
|
||||
|
||||
jnc L20 /* found NUL => check last word */
|
||||
|
||||
/* We are only interested in carry bits that change due to the
|
||||
previous add, so remove original bits */
|
||||
xorl %edx, %edi /* (word+magic)^word */
|
||||
|
||||
/* Now test for the other three overflow bits. */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
|
||||
/* If at least one byte of the word is C we don't get 0 in %edi. */
|
||||
jnz L20 /* found NUL => check last word */
|
||||
|
||||
/* Now we made sure the dword does not contain the character we are
|
||||
looking for. But because we deal with strings we have to check
|
||||
for the end of string before testing the next dword. */
|
||||
|
||||
xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %edx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L4 /* highest byte is C => examine dword */
|
||||
xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L3 /* C is detected in the word => examine it */
|
||||
|
||||
movl 4(%esi), %edx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %edx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L21 /* found NUL => check last word */
|
||||
xorl %edx, %edi /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L21 /* found NUL => check last word */
|
||||
xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %edx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L41 /* highest byte is C => examine dword */
|
||||
xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L31 /* C is detected in the word => examine it */
|
||||
|
||||
movl 8(%esi), %edx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %edx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L22 /* found NUL => check last word */
|
||||
xorl %edx, %edi /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L22 /* found NUL => check last word */
|
||||
xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %edx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L42 /* highest byte is C => examine dword */
|
||||
xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L32 /* C is detected in the word => examine it */
|
||||
|
||||
movl 12(%esi), %edx /* get word (= 4 bytes) in question */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %edx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L23 /* found NUL => check last word */
|
||||
xorl %edx, %edi /* (word+magic)^word */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jnz L23 /* found NUL => check last word */
|
||||
xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c
|
||||
are now 0 */
|
||||
movl $0xfefefeff, %edi /* magic value */
|
||||
addl %edx, %edi /* add the magic value to the word. We get
|
||||
carry bits reported for each byte which
|
||||
is *not* 0 */
|
||||
jnc L43 /* highest byte is C => examine dword */
|
||||
xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
||||
orl $0xfefefeff, %edi /* set all non-carry bits */
|
||||
incl %edi /* add 1: if one carry bit was *not* set
|
||||
the addition will not result in 0. */
|
||||
jz L1 /* C is not detected => restart loop */
|
||||
jmp L33 /* examine word */
|
||||
|
||||
L23: addl $4, %esi /* adjust pointer */
|
||||
L22: addl $4, %esi
|
||||
L21: addl $4, %esi
|
||||
|
||||
/* What remains to do is to test which byte the NUL char is and
|
||||
whether the searched character appears in one of the bytes
|
||||
before. A special case is that the searched byte maybe NUL.
|
||||
In this case a pointer to the terminating NUL char has to be
|
||||
returned. */
|
||||
|
||||
L20: cmpb %cl, %dl /* is first byte == C? */
|
||||
jne L24 /* no => skip */
|
||||
movl %esi, %eax /* store address as result */
|
||||
L24: testb %dl, %dl /* is first byte == NUL? */
|
||||
jz L2 /* yes => return */
|
||||
|
||||
cmpb %cl, %dh /* is second byte == C? */
|
||||
jne L25 /* no => skip */
|
||||
leal 1(%esi), %eax /* store address as result */
|
||||
L25: testb %dh, %dh /* is second byte == NUL? */
|
||||
jz L2 /* yes => return */
|
||||
|
||||
shrl $16,%edx /* make upper bytes accessible */
|
||||
cmpb %cl, %dl /* is third byte == C */
|
||||
jne L26 /* no => skip */
|
||||
leal 2(%esi), %eax /* store address as result */
|
||||
L26: testb %dl, %dl /* is third byte == NUL */
|
||||
jz L2 /* yes => return */
|
||||
|
||||
cmpb %cl, %dh /* is fourth byte == C */
|
||||
jne L2 /* no => skip */
|
||||
leal 3(%esi), %eax /* store address as result */
|
||||
|
||||
L2: popl %esi /* restore saved register content */
|
||||
popl %edi
|
||||
|
||||
ret
|
||||
|
||||
weak_alias (strrchr, rindex)
|
176
sysdeps/i386/strspn.S
Normal file
176
sysdeps/i386/strspn.S
Normal file
@ -0,0 +1,176 @@
|
||||
/* strcspn (str, ss) -- Return the length of the initial segement of STR
|
||||
which contains only characters from SS.
|
||||
For Intel 80x86, x>=3.
|
||||
Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
|
||||
Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
INPUT PARAMETERS:
|
||||
str (sp + 4)
|
||||
skipset (sp + 8)
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY (strspn)
|
||||
movl 4(%esp), %edx /* get string pointer */
|
||||
movl 8(%esp), %eax /* get skipset pointer */
|
||||
|
||||
/* First we create a table with flags for all possible characters.
|
||||
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
|
||||
supported by the C string functions we have 256 characters.
|
||||
Before inserting marks for the stop characters we clear the whole
|
||||
table. The unrolled form is much faster than a loop. */
|
||||
xorl %ecx, %ecx /* %ecx = 0 !!! */
|
||||
|
||||
pushl %ecx /* make a 256 bytes long block filled with 0 */
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl %ecx
|
||||
pushl $0 /* These immediate values make the label 2 */
|
||||
pushl $0 /* to be aligned on a 16 byte boundary to */
|
||||
pushl $0 /* get a better performance of the loop. */
|
||||
pushl $0
|
||||
pushl $0
|
||||
pushl $0
|
||||
|
||||
/* For understanding the following code remember that %ecx == 0 now.
|
||||
Although all the following instruction only modify %cl we always
|
||||
have a correct zero-extended 32-bit value in %ecx. */
|
||||
|
||||
/* Don't change the "testb $0xff,%%cl" to "testb %%cl,%%cl". We want
|
||||
longer instructions so that the next loop aligns without adding nops. */
|
||||
|
||||
L2: movb (%eax), %cl /* get byte from stopset */
|
||||
testb %cl, %cl /* is NUL char? */
|
||||
jz L1 /* yes => start compare loop */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 1(%eax), %cl /* get byte from stopset */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jz L1 /* yes => start compare loop */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 2(%eax), %cl /* get byte from stopset */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jz L1 /* yes => start compare loop */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 3(%eax), %cl /* get byte from stopset */
|
||||
addl $4, %eax /* increment stopset pointer */
|
||||
movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jnz L2 /* no => process next dword from stopset */
|
||||
|
||||
L1: leal -4(%edx), %eax /* prepare loop */
|
||||
|
||||
/* We use a neat trick for the following loop. Normally we would
|
||||
have to test for two termination conditions
|
||||
1. a character in the stopset was found
|
||||
and
|
||||
2. the end of the string was found
|
||||
But as a sign that the chracter is in the stopset we store its
|
||||
value in the table. But the value of NUL is NUL so the loop
|
||||
terminates for NUL in every case. */
|
||||
|
||||
L3: addl $4, %eax /* adjust pointer for full loop round */
|
||||
|
||||
movb (%eax), %cl /* get byte from string */
|
||||
testb %cl, (%esp,%ecx) /* is it contained in skipset? */
|
||||
jz L4 /* no => return */
|
||||
|
||||
movb 1(%eax), %cl /* get byte from string */
|
||||
testb %cl, (%esp,%ecx) /* is it contained in skipset? */
|
||||
jz L5 /* no => return */
|
||||
|
||||
movb 2(%eax), %cl /* get byte from string */
|
||||
testb %cl, (%esp,%ecx) /* is it contained in skipset? */
|
||||
jz L6 /* no => return */
|
||||
|
||||
movb 3(%eax), %cl /* get byte from string */
|
||||
testb %cl, (%esp,%ecx) /* is it contained in skipset? */
|
||||
jnz L3 /* yes => start loop again */
|
||||
|
||||
incl %eax /* adjust pointer */
|
||||
L6: incl %eax
|
||||
L5: incl %eax
|
||||
|
||||
L4: subl %edx, %eax /* we have to return the number of valid
|
||||
characters, so compute distance to first
|
||||
non-valid character */
|
||||
addl $256, %esp /* remove stopset */
|
||||
|
||||
ret
|
@ -1,7 +1,7 @@
|
||||
/* i80386 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
|
||||
sum in a third limb vector.
|
||||
|
||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
@ -37,10 +37,10 @@ C_SYMBOL_NAME(__mpn_sub_n:)
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
|
||||
movl 12(%esp),%edi /* res_ptr */
|
||||
movl 16(%esp),%esi /* s1_ptr */
|
||||
movl 20(%esp),%edx /* s2_ptr */
|
||||
movl 24(%esp),%ecx /* size */
|
||||
movl 12(%esp),%edi /* res_ptr */
|
||||
movl 16(%esp),%esi /* s1_ptr */
|
||||
movl 20(%esp),%edx /* s2_ptr */
|
||||
movl 24(%esp),%ecx /* size */
|
||||
|
||||
movl %ecx,%eax
|
||||
shrl $3,%ecx /* compute count for unrolled loop */
|
||||
@ -54,14 +54,18 @@ C_SYMBOL_NAME(__mpn_sub_n:)
|
||||
subl %eax,%edx /* ... enter the loop */
|
||||
shrl $2,%eax /* restore previous value */
|
||||
#ifdef PIC
|
||||
call here
|
||||
here: leal (Loop - 3 - here)(%eax,%eax,8),%eax
|
||||
addl %eax,(%esp)
|
||||
ret
|
||||
/* Calculate start address in loop for PIC. Due to limitations in some
|
||||
assemblers, Loop-L0-3 cannot be put into the leal */
|
||||
call L0
|
||||
L0: leal (%eax,%eax,8),%eax
|
||||
addl (%esp),%eax
|
||||
addl $(Loop-L0-3),%eax
|
||||
addl $4,%esp
|
||||
#else
|
||||
leal (Loop - 3)(%eax,%eax,8),%eax /* calc start addr in loop */
|
||||
jmp *%eax /* jump into loop */
|
||||
/* Calculate start address in loop for non-PIC. */
|
||||
leal (Loop - 3)(%eax,%eax,8),%eax
|
||||
#endif
|
||||
jmp *%eax /* jump into loop */
|
||||
ALIGN (3)
|
||||
Loop: movl (%esi),%eax
|
||||
sbbl (%edx),%eax
|
||||
|
21
sysdeps/i960/add_n.s
Normal file
21
sysdeps/i960/add_n.s
Normal file
@ -0,0 +1,21 @@
|
||||
.text
|
||||
.align 4
|
||||
.globl ___mpn_add_n
|
||||
___mpn_add_n:
|
||||
mov 0,g6 # clear carry-save register
|
||||
cmpo 1,0 # clear cy
|
||||
|
||||
Loop: subo 1,g3,g3 # update loop counter
|
||||
ld (g1),g5 # load from s1_ptr
|
||||
addo 4,g1,g1 # s1_ptr++
|
||||
ld (g2),g4 # load from s2_ptr
|
||||
addo 4,g2,g2 # s2_ptr++
|
||||
cmpo g6,1 # restore cy from g6, relies on cy being 0
|
||||
addc g4,g5,g4 # main add
|
||||
subc 0,0,g6 # save cy in g6
|
||||
st g4,(g0) # store result to res_ptr
|
||||
addo 4,g0,g0 # res_ptr++
|
||||
cmpobne 0,g3,Loop # when branch is taken, clears C bit
|
||||
|
||||
mov g6,g0
|
||||
ret
|
26
sysdeps/i960/addmul_1.s
Normal file
26
sysdeps/i960/addmul_1.s
Normal file
@ -0,0 +1,26 @@
|
||||
.text
|
||||
.align 4
|
||||
.globl ___mpn_mul_1
|
||||
___mpn_mul_1:
|
||||
subo g2,0,g2
|
||||
shlo 2,g2,g4
|
||||
subo g4,g1,g1
|
||||
subo g4,g0,g13
|
||||
mov 0,g0
|
||||
|
||||
cmpo 1,0 # clear C bit on AC.cc
|
||||
|
||||
Loop: ld (g1)[g2*4],g5
|
||||
emul g3,g5,g6
|
||||
ld (g13)[g2*4],g5
|
||||
|
||||
addc g0,g6,g6 # relies on that C bit is clear
|
||||
addc 0,g7,g7
|
||||
addc g5,g6,g6 # relies on that C bit is clear
|
||||
st g6,(g13)[g2*4]
|
||||
addc 0,g7,g0
|
||||
|
||||
addo g2,1,g2
|
||||
cmpobne 0,g2,Loop # when branch is taken, clears C bit
|
||||
|
||||
ret
|
23
sysdeps/i960/mul_1.s
Normal file
23
sysdeps/i960/mul_1.s
Normal file
@ -0,0 +1,23 @@
|
||||
.text
|
||||
.align 4
|
||||
.globl ___mpn_mul_1
|
||||
___mpn_mul_1:
|
||||
subo g2,0,g2
|
||||
shlo 2,g2,g4
|
||||
subo g4,g1,g1
|
||||
subo g4,g0,g13
|
||||
mov 0,g0
|
||||
|
||||
cmpo 1,0 # clear C bit on AC.cc
|
||||
|
||||
Loop: ld (g1)[g2*4],g5
|
||||
emul g3,g5,g6
|
||||
|
||||
addc g0,g6,g6 # relies on that C bit is clear
|
||||
st g6,(g13)[g2*4]
|
||||
addc 0,g7,g0
|
||||
|
||||
addo g2,1,g2
|
||||
cmpobne 0,g2,Loop # when branch is taken, clears C bit
|
||||
|
||||
ret
|
21
sysdeps/i960/sub_n.s
Normal file
21
sysdeps/i960/sub_n.s
Normal file
@ -0,0 +1,21 @@
|
||||
.text
|
||||
.align 4
|
||||
.globl ___mpn_sub_n
|
||||
___mpn_sub_n:
|
||||
mov 1,g6 # set carry-save register
|
||||
cmpo 1,0 # clear cy
|
||||
|
||||
Loop: subo 1,g3,g3 # update loop counter
|
||||
ld (g1),g5 # load from s1_ptr
|
||||
addo 4,g1,g1 # s1_ptr++
|
||||
ld (g2),g4 # load from s2_ptr
|
||||
addo 4,g2,g2 # s2_ptr++
|
||||
cmpo g6,1 # restore cy from g6, relies on cy being 0
|
||||
subc g4,g5,g4 # main subtract
|
||||
subc 0,0,g6 # save cy in g6
|
||||
st g4,(g0) # store result to res_ptr
|
||||
addo 4,g0,g0 # res_ptr++
|
||||
cmpobne 0,g3,Loop # when branch is taken, cy will be 0
|
||||
|
||||
mov g6,g0
|
||||
ret
|
103
sysdeps/m88k/m88100/add_n.s
Normal file
103
sysdeps/m88k/m88100/add_n.s
Normal file
@ -0,0 +1,103 @@
|
||||
; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
|
||||
; sum in a third limb vector.
|
||||
|
||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr r2
|
||||
; s1_ptr r3
|
||||
; s2_ptr r4
|
||||
; size r5
|
||||
|
||||
; This code has been optimized to run one instruction per clock, avoiding
|
||||
; load stalls and writeback contention. As a result, the instruction
|
||||
; order is not always natural.
|
||||
|
||||
; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
|
||||
; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
|
||||
|
||||
text
|
||||
align 16
|
||||
global ___mpn_add_n
|
||||
___mpn_add_n:
|
||||
ld r6,r3,0 ; read first limb from s1_ptr
|
||||
extu r10,r5,3
|
||||
ld r7,r4,0 ; read first limb from s2_ptr
|
||||
|
||||
subu.co r5,r0,r5 ; (clear carry as side effect)
|
||||
mak r5,r5,3<4>
|
||||
bcnd eq0,r5,Lzero
|
||||
|
||||
or r12,r0,lo16(Lbase)
|
||||
or.u r12,r12,hi16(Lbase)
|
||||
addu r12,r12,r5 ; r12 is address for entering in loop
|
||||
|
||||
extu r5,r5,2 ; divide by 4
|
||||
subu r2,r2,r5 ; adjust res_ptr
|
||||
subu r3,r3,r5 ; adjust s1_ptr
|
||||
subu r4,r4,r5 ; adjust s2_ptr
|
||||
|
||||
or r8,r6,r0
|
||||
|
||||
jmp.n r12
|
||||
or r9,r7,r0
|
||||
|
||||
Loop: addu r3,r3,32
|
||||
st r8,r2,28
|
||||
addu r4,r4,32
|
||||
ld r6,r3,0
|
||||
addu r2,r2,32
|
||||
ld r7,r4,0
|
||||
Lzero: subu r10,r10,1 ; add 0 + 8r limbs (adj loop cnt)
|
||||
Lbase: ld r8,r3,4
|
||||
addu.cio r6,r6,r7
|
||||
ld r9,r4,4
|
||||
st r6,r2,0
|
||||
ld r6,r3,8 ; add 7 + 8r limbs
|
||||
addu.cio r8,r8,r9
|
||||
ld r7,r4,8
|
||||
st r8,r2,4
|
||||
ld r8,r3,12 ; add 6 + 8r limbs
|
||||
addu.cio r6,r6,r7
|
||||
ld r9,r4,12
|
||||
st r6,r2,8
|
||||
ld r6,r3,16 ; add 5 + 8r limbs
|
||||
addu.cio r8,r8,r9
|
||||
ld r7,r4,16
|
||||
st r8,r2,12
|
||||
ld r8,r3,20 ; add 4 + 8r limbs
|
||||
addu.cio r6,r6,r7
|
||||
ld r9,r4,20
|
||||
st r6,r2,16
|
||||
ld r6,r3,24 ; add 3 + 8r limbs
|
||||
addu.cio r8,r8,r9
|
||||
ld r7,r4,24
|
||||
st r8,r2,20
|
||||
ld r8,r3,28 ; add 2 + 8r limbs
|
||||
addu.cio r6,r6,r7
|
||||
ld r9,r4,28
|
||||
st r6,r2,24
|
||||
bcnd.n ne0,r10,Loop ; add 1 + 8r limbs
|
||||
addu.cio r8,r8,r9
|
||||
|
||||
st r8,r2,28 ; store most significant limb
|
||||
|
||||
jmp.n r1
|
||||
addu.ci r2,r0,r0 ; return carry-out from most sign. limb
|
128
sysdeps/m88k/m88100/mul_1.s
Normal file
128
sysdeps/m88k/m88100/mul_1.s
Normal file
@ -0,0 +1,128 @@
|
||||
; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||
; store the product in a second limb vector.
|
||||
|
||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr r2
|
||||
; s1_ptr r3
|
||||
; size r4
|
||||
; s2_limb r5
|
||||
|
||||
; Common overhead is about 11 cycles/invocation.
|
||||
|
||||
; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb. (The
|
||||
; pipeline stalls 2 cycles due to WB contention.)
|
||||
|
||||
; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb. (The
|
||||
; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.)
|
||||
|
||||
; To enhance speed:
|
||||
; 1. Unroll main loop 4-8 times.
|
||||
; 2. Schedule code to avoid WB contention. It might be tempting to move the
|
||||
; ld instruction in the loops down to save 2 cycles (less WB contention),
|
||||
; but that looses because the ultimate value will be read from outside
|
||||
; the allocated space. But if we handle the ultimate multiplication in
|
||||
; the tail, we can do this.
|
||||
; 3. Make the multiplication with less instructions. I think the code for
|
||||
; (S2_LIMB >= 0x10000) is not minimal.
|
||||
; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or
|
||||
; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11
|
||||
; cycles/limb. (Assuming infinite unrolling.)
|
||||
|
||||
text
|
||||
align 16
|
||||
global ___mpn_mul_1
|
||||
___mpn_mul_1:
|
||||
|
||||
; Make S1_PTR and RES_PTR point at the end of their blocks
|
||||
; and negate SIZE.
|
||||
lda r3,r3[r4]
|
||||
lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
|
||||
subu r4,r0,r4
|
||||
|
||||
addu.co r2,r0,r0 ; r2 = cy = 0
|
||||
ld r9,r3[r4]
|
||||
mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
|
||||
extu r8,r5,16 ; r8 = hi(S2_LIMB)
|
||||
bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
|
||||
subu r6,r6,4
|
||||
|
||||
; General code for any value of S2_LIMB.
|
||||
|
||||
; Make a stack frame and save r25 and r26
|
||||
subu r31,r31,16
|
||||
st.d r25,r31,8
|
||||
|
||||
; Enter the loop in the middle
|
||||
br.n L1
|
||||
addu r4,r4,1
|
||||
|
||||
Loop:
|
||||
ld r9,r3[r4]
|
||||
st r26,r6[r4]
|
||||
; bcnd ne0,r0,0 ; bubble
|
||||
addu r4,r4,1
|
||||
L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
|
||||
mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
|
||||
mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
|
||||
mul r10,r12,r8 ; r10 = prod_1a mul_3
|
||||
extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
|
||||
mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
|
||||
mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
|
||||
extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
|
||||
addu r10,r10,r11 ; addu_1 WB extu_2
|
||||
; bcnd ne0,r0,0 ; bubble WB addu_1
|
||||
addu.co r10,r10,r12 ; WB mul_4
|
||||
mask.u r10,r10,0xffff ; move the 16 most significant bits...
|
||||
addu.ci r10,r10,r0 ; ...to the low half of the word...
|
||||
rot r10,r10,16 ; ...and put carry in pos 16.
|
||||
addu.co r26,r26,r2 ; add old carry limb
|
||||
bcnd.n ne0,r4,Loop
|
||||
addu.ci r2,r25,r10 ; compute new carry limb
|
||||
|
||||
st r26,r6[r4]
|
||||
ld.d r25,r31,8
|
||||
jmp.n r1
|
||||
addu r31,r31,16
|
||||
|
||||
; Fast code for S2_LIMB < 0x10000
|
||||
Lsmall:
|
||||
; Enter the loop in the middle
|
||||
br.n SL1
|
||||
addu r4,r4,1
|
||||
|
||||
SLoop:
|
||||
ld r9,r3[r4] ;
|
||||
st r8,r6[r4] ;
|
||||
addu r4,r4,1 ;
|
||||
SL1: mul r8,r9,r5 ; low word of product
|
||||
mask r12,r9,0xffff ; r12 = lo(s1_limb)
|
||||
extu r13,r9,16 ; r13 = hi(s1_limb)
|
||||
mul r11,r12,r7 ; r11 = prod_0
|
||||
mul r12,r13,r7 ; r12 = prod_1b
|
||||
addu.cio r8,r8,r2 ; add old carry limb
|
||||
extu r10,r11,16 ; r11 = hi(prod_0)
|
||||
addu r10,r10,r12 ;
|
||||
bcnd.n ne0,r4,SLoop
|
||||
extu r2,r10,16 ; r2 = new carry limb
|
||||
|
||||
jmp.n r1
|
||||
st r8,r6[r4]
|
104
sysdeps/m88k/m88100/sub_n.s
Normal file
104
sysdeps/m88k/m88100/sub_n.s
Normal file
@ -0,0 +1,104 @@
|
||||
; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
|
||||
; store difference in a third limb vector.
|
||||
|
||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr r2
|
||||
; s1_ptr r3
|
||||
; s2_ptr r4
|
||||
; size r5
|
||||
|
||||
; This code has been optimized to run one instruction per clock, avoiding
|
||||
; load stalls and writeback contention. As a result, the instruction
|
||||
; order is not always natural.
|
||||
|
||||
; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
|
||||
; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
|
||||
|
||||
text
|
||||
align 16
|
||||
global ___mpn_sub_n
|
||||
___mpn_sub_n:
|
||||
ld r6,r3,0 ; read first limb from s1_ptr
|
||||
extu r10,r5,3
|
||||
ld r7,r4,0 ; read first limb from s2_ptr
|
||||
|
||||
subu.co r5,r0,r5 ; (clear carry as side effect)
|
||||
mak r5,r5,3<4>
|
||||
bcnd eq0,r5,Lzero
|
||||
|
||||
or r12,r0,lo16(Lbase)
|
||||
or.u r12,r12,hi16(Lbase)
|
||||
addu r12,r12,r5 ; r12 is address for entering in loop
|
||||
|
||||
extu r5,r5,2 ; divide by 4
|
||||
subu r2,r2,r5 ; adjust res_ptr
|
||||
subu r3,r3,r5 ; adjust s1_ptr
|
||||
subu r4,r4,r5 ; adjust s2_ptr
|
||||
|
||||
or r8,r6,r0
|
||||
|
||||
jmp.n r12
|
||||
or r9,r7,r0
|
||||
|
||||
Loop: addu r3,r3,32
|
||||
st r8,r2,28
|
||||
addu r4,r4,32
|
||||
ld r6,r3,0
|
||||
addu r2,r2,32
|
||||
ld r7,r4,0
|
||||
Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt)
|
||||
Lbase: ld r8,r3,4
|
||||
subu.cio r6,r6,r7
|
||||
ld r9,r4,4
|
||||
st r6,r2,0
|
||||
ld r6,r3,8 ; subtract 7 + 8r limbs
|
||||
subu.cio r8,r8,r9
|
||||
ld r7,r4,8
|
||||
st r8,r2,4
|
||||
ld r8,r3,12 ; subtract 6 + 8r limbs
|
||||
subu.cio r6,r6,r7
|
||||
ld r9,r4,12
|
||||
st r6,r2,8
|
||||
ld r6,r3,16 ; subtract 5 + 8r limbs
|
||||
subu.cio r8,r8,r9
|
||||
ld r7,r4,16
|
||||
st r8,r2,12
|
||||
ld r8,r3,20 ; subtract 4 + 8r limbs
|
||||
subu.cio r6,r6,r7
|
||||
ld r9,r4,20
|
||||
st r6,r2,16
|
||||
ld r6,r3,24 ; subtract 3 + 8r limbs
|
||||
subu.cio r8,r8,r9
|
||||
ld r7,r4,24
|
||||
st r8,r2,20
|
||||
ld r8,r3,28 ; subtract 2 + 8r limbs
|
||||
subu.cio r6,r6,r7
|
||||
ld r9,r4,28
|
||||
st r6,r2,24
|
||||
bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs
|
||||
subu.cio r8,r8,r9
|
||||
|
||||
st r8,r2,28 ; store most significant limb
|
||||
|
||||
addu.ci r2,r0,r0 ; return carry-out from most sign. limb
|
||||
jmp.n r1
|
||||
xor r2,r2,1
|
84
sysdeps/m88k/m88110/mul_1.s
Normal file
84
sysdeps/m88k/m88110/mul_1.s
Normal file
@ -0,0 +1,84 @@
|
||||
; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||
; store the product in a second limb vector.
|
||||
|
||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
; This file is part of the GNU MP Library.
|
||||
|
||||
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Library General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||
; option) any later version.
|
||||
|
||||
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
; License for more details.
|
||||
|
||||
; You should have received a copy of the GNU Library General Public License
|
||||
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
; INPUT PARAMETERS
|
||||
; res_ptr r2
|
||||
; s1_ptr r3
|
||||
; size r4
|
||||
; s2_limb r5
|
||||
|
||||
text
|
||||
align 16
|
||||
global ___mpn_mul_1
|
||||
___mpn_mul_1:
|
||||
; Make S1_PTR and RES_PTR point at the end of their blocks
|
||||
; and negate SIZE.
|
||||
lda r3,r3[r4]
|
||||
lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
|
||||
subu r4,r0,r4
|
||||
|
||||
addu.co r2,r0,r0 ; r2 = cy = 0
|
||||
|
||||
ld r6,r3[r4]
|
||||
addu r4,r4,1
|
||||
mulu.d r10,r6,r5
|
||||
bcnd.n eq0,r4,Lend
|
||||
subu r8,r8,8
|
||||
|
||||
Loop: ld r6,r3[r4]
|
||||
addu.cio r9,r11,r2
|
||||
or r2,r10,r0 ; could be avoided if unrolled
|
||||
addu r4,r4,1
|
||||
mulu.d r10,r6,r5
|
||||
bcnd.n ne0,r4,Loop
|
||||
st r9,r8[r4]
|
||||
|
||||
Lend: addu.cio r9,r11,r2
|
||||
st r9,r8,4
|
||||
jmp.n r1
|
||||
addu.ci r2,r10,r0
|
||||
|
||||
; This is the Right Way to do this on '110. 4 cycles / 64-bit limb.
|
||||
; ld.d r10,
|
||||
; mulu.d
|
||||
; addu.cio
|
||||
; addu.cio
|
||||
; st.d
|
||||
; mulu.d ,r11,r5
|
||||
; ld.d r12,
|
||||
; mulu.d ,r10,r5
|
||||
; addu.cio
|
||||
; addu.cio
|
||||
; st.d
|
||||
; mulu.d
|
||||
; ld.d r10,
|
||||
; mulu.d
|
||||
; addu.cio
|
||||
; addu.cio
|
||||
; st.d
|
||||
; mulu.d
|
||||
; ld.d r10,
|
||||
; mulu.d
|
||||
; addu.cio
|
||||
; addu.cio
|
||||
; st.d
|
||||
; mulu.d
|
119
sysdeps/mips/add_n.s
Normal file
119
sysdeps/mips/add_n.s
Normal file
@ -0,0 +1,119 @@
|
||||
# MIPS2 __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||
# store sum in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# s2_ptr $6
|
||||
# size $7
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl __mpn_add_n
|
||||
.ent __mpn_add_n
|
||||
__mpn_add_n:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
lw $10,0($5)
|
||||
lw $11,0($6)
|
||||
|
||||
addiu $7,$7,-1
|
||||
and $9,$7,4-1 # number of limbs in first loop
|
||||
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||
move $2,$0
|
||||
|
||||
subu $7,$7,$9
|
||||
|
||||
.Loop0: addiu $9,$9,-1
|
||||
lw $12,4($5)
|
||||
addu $11,$11,$2
|
||||
lw $13,4($6)
|
||||
sltu $8,$11,$2
|
||||
addu $11,$10,$11
|
||||
sltu $2,$11,$10
|
||||
sw $11,0($4)
|
||||
or $2,$2,$8
|
||||
|
||||
addiu $5,$5,4
|
||||
addiu $6,$6,4
|
||||
move $10,$12
|
||||
move $11,$13
|
||||
bne $9,$0,.Loop0
|
||||
addiu $4,$4,4
|
||||
|
||||
.L0: beq $7,$0,.Lend
|
||||
nop
|
||||
|
||||
.Loop: addiu $7,$7,-4
|
||||
|
||||
lw $12,4($5)
|
||||
addu $11,$11,$2
|
||||
lw $13,4($6)
|
||||
sltu $8,$11,$2
|
||||
addu $11,$10,$11
|
||||
sltu $2,$11,$10
|
||||
sw $11,0($4)
|
||||
or $2,$2,$8
|
||||
|
||||
lw $10,8($5)
|
||||
addu $13,$13,$2
|
||||
lw $11,8($6)
|
||||
sltu $8,$13,$2
|
||||
addu $13,$12,$13
|
||||
sltu $2,$13,$12
|
||||
sw $13,4($4)
|
||||
or $2,$2,$8
|
||||
|
||||
lw $12,12($5)
|
||||
addu $11,$11,$2
|
||||
lw $13,12($6)
|
||||
sltu $8,$11,$2
|
||||
addu $11,$10,$11
|
||||
sltu $2,$11,$10
|
||||
sw $11,8($4)
|
||||
or $2,$2,$8
|
||||
|
||||
lw $10,16($5)
|
||||
addu $13,$13,$2
|
||||
lw $11,16($6)
|
||||
sltu $8,$13,$2
|
||||
addu $13,$12,$13
|
||||
sltu $2,$13,$12
|
||||
sw $13,12($4)
|
||||
or $2,$2,$8
|
||||
|
||||
addiu $5,$5,16
|
||||
addiu $6,$6,16
|
||||
|
||||
bne $7,$0,.Loop
|
||||
addiu $4,$4,16
|
||||
|
||||
.Lend: addu $11,$11,$2
|
||||
sltu $8,$11,$2
|
||||
addu $11,$10,$11
|
||||
sltu $2,$11,$10
|
||||
sw $11,0($4)
|
||||
j $31
|
||||
or $2,$2,$8
|
||||
|
||||
.end __mpn_add_n
|
96
sysdeps/mips/addmul_1.s
Normal file
96
sysdeps/mips/addmul_1.s
Normal file
@ -0,0 +1,96 @@
|
||||
# MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
|
||||
# add the product to a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# size $6
|
||||
# s2_limb $7
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __mpn_addmul_1
|
||||
.ent __mpn_addmul_1
|
||||
__mpn_addmul_1:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
# warm up phase 0
|
||||
lw $8,0($5)
|
||||
|
||||
# warm up phase 1
|
||||
addiu $5,$5,4
|
||||
multu $8,$7
|
||||
|
||||
addiu $6,$6,-1
|
||||
beq $6,$0,$LC0
|
||||
move $2,$0 # zero cy2
|
||||
|
||||
addiu $6,$6,-1
|
||||
beq $6,$0,$LC1
|
||||
lw $8,0($5) # load new s1 limb as early as possible
|
||||
|
||||
Loop: lw $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
addiu $5,$5,4
|
||||
addu $3,$3,$2 # add old carry limb to low product limb
|
||||
multu $8,$7
|
||||
lw $8,0($5) # load new s1 limb as early as possible
|
||||
addiu $6,$6,-1 # decrement loop counter
|
||||
sltu $2,$3,$2 # carry from previous addition -> $2
|
||||
addu $3,$10,$3
|
||||
sltu $10,$3,$10
|
||||
addu $2,$2,$10
|
||||
sw $3,0($4)
|
||||
addiu $4,$4,4
|
||||
bne $6,$0,Loop # should be "bnel"
|
||||
addu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 1
|
||||
$LC1: lw $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
addu $3,$3,$2
|
||||
sltu $2,$3,$2
|
||||
multu $8,$7
|
||||
addu $3,$10,$3
|
||||
sltu $10,$3,$10
|
||||
addu $2,$2,$10
|
||||
sw $3,0($4)
|
||||
addiu $4,$4,4
|
||||
addu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 0
|
||||
$LC0: lw $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
addu $3,$3,$2
|
||||
sltu $2,$3,$2
|
||||
addu $3,$10,$3
|
||||
sltu $10,$3,$10
|
||||
addu $2,$2,$10
|
||||
sw $3,0($4)
|
||||
j $31
|
||||
addu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
.end __mpn_addmul_1
|
94
sysdeps/mips/lshift.s
Normal file
94
sysdeps/mips/lshift.s
Normal file
@ -0,0 +1,94 @@
|
||||
# MIPS2 __mpn_lshift --
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# src_ptr $5
|
||||
# size $6
|
||||
# cnt $7
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl __mpn_lshift
|
||||
.ent __mpn_lshift
|
||||
__mpn_lshift:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
sll $2,$6,2
|
||||
addu $5,$5,$2 # make r5 point at end of src
|
||||
lw $10,-4($5) # load first limb
|
||||
subu $13,$0,$7
|
||||
addu $4,$4,$2 # make r4 point at end of res
|
||||
addiu $6,$6,-1
|
||||
and $9,$6,4-1 # number of limbs in first loop
|
||||
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||
srl $2,$10,$13 # compute function result
|
||||
|
||||
subu $6,$6,$9
|
||||
|
||||
.Loop0: lw $3,-8($5)
|
||||
addiu $4,$4,-4
|
||||
addiu $5,$5,-4
|
||||
addiu $9,$9,-1
|
||||
sll $11,$10,$7
|
||||
srl $12,$3,$13
|
||||
move $10,$3
|
||||
or $8,$11,$12
|
||||
bne $9,$0,.Loop0
|
||||
sw $8,0($4)
|
||||
|
||||
.L0: beq $6,$0,.Lend
|
||||
nop
|
||||
|
||||
.Loop: lw $3,-8($5)
|
||||
addiu $4,$4,-16
|
||||
addiu $6,$6,-4
|
||||
sll $11,$10,$7
|
||||
srl $12,$3,$13
|
||||
|
||||
lw $10,-12($5)
|
||||
sll $14,$3,$7
|
||||
or $8,$11,$12
|
||||
sw $8,12($4)
|
||||
srl $9,$10,$13
|
||||
|
||||
lw $3,-16($5)
|
||||
sll $11,$10,$7
|
||||
or $8,$14,$9
|
||||
sw $8,8($4)
|
||||
srl $12,$3,$13
|
||||
|
||||
lw $10,-20($5)
|
||||
sll $14,$3,$7
|
||||
or $8,$11,$12
|
||||
sw $8,4($4)
|
||||
srl $9,$10,$13
|
||||
|
||||
addiu $5,$5,-16
|
||||
or $8,$14,$9
|
||||
bgtz $6,.Loop
|
||||
sw $8,0($4)
|
||||
|
||||
.Lend: sll $8,$10,$7
|
||||
j $31
|
||||
sw $8,-4($4)
|
||||
.end __mpn_lshift
|
119
sysdeps/mips/mips3/add_n.s
Normal file
119
sysdeps/mips/mips3/add_n.s
Normal file
@ -0,0 +1,119 @@
|
||||
# MIPS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||
# store sum in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# s2_ptr $6
|
||||
# size $7
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl __mpn_add_n
|
||||
.ent __mpn_add_n
|
||||
__mpn_add_n:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
ld $10,0($5)
|
||||
ld $11,0($6)
|
||||
|
||||
daddiu $7,$7,-1
|
||||
and $9,$7,4-1 # number of limbs in first loop
|
||||
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||
move $2,$0
|
||||
|
||||
dsubu $7,$7,$9
|
||||
|
||||
.Loop0: daddiu $9,$9,-1
|
||||
ld $12,8($5)
|
||||
daddu $11,$11,$2
|
||||
ld $13,8($6)
|
||||
sltu $8,$11,$2
|
||||
daddu $11,$10,$11
|
||||
sltu $2,$11,$10
|
||||
sd $11,0($4)
|
||||
or $2,$2,$8
|
||||
|
||||
daddiu $5,$5,8
|
||||
daddiu $6,$6,8
|
||||
move $10,$12
|
||||
move $11,$13
|
||||
bne $9,$0,.Loop0
|
||||
daddiu $4,$4,8
|
||||
|
||||
.L0: beq $7,$0,.Lend
|
||||
nop
|
||||
|
||||
.Loop: daddiu $7,$7,-4
|
||||
|
||||
ld $12,8($5)
|
||||
daddu $11,$11,$2
|
||||
ld $13,8($6)
|
||||
sltu $8,$11,$2
|
||||
daddu $11,$10,$11
|
||||
sltu $2,$11,$10
|
||||
sd $11,0($4)
|
||||
or $2,$2,$8
|
||||
|
||||
ld $10,16($5)
|
||||
daddu $13,$13,$2
|
||||
ld $11,16($6)
|
||||
sltu $8,$13,$2
|
||||
daddu $13,$12,$13
|
||||
sltu $2,$13,$12
|
||||
sd $13,8($4)
|
||||
or $2,$2,$8
|
||||
|
||||
ld $12,24($5)
|
||||
daddu $11,$11,$2
|
||||
ld $13,24($6)
|
||||
sltu $8,$11,$2
|
||||
daddu $11,$10,$11
|
||||
sltu $2,$11,$10
|
||||
sd $11,16($4)
|
||||
or $2,$2,$8
|
||||
|
||||
ld $10,32($5)
|
||||
daddu $13,$13,$2
|
||||
ld $11,32($6)
|
||||
sltu $8,$13,$2
|
||||
daddu $13,$12,$13
|
||||
sltu $2,$13,$12
|
||||
sd $13,24($4)
|
||||
or $2,$2,$8
|
||||
|
||||
daddiu $5,$5,32
|
||||
daddiu $6,$6,32
|
||||
|
||||
bne $7,$0,.Loop
|
||||
daddiu $4,$4,32
|
||||
|
||||
.Lend: daddu $11,$11,$2
|
||||
sltu $8,$11,$2
|
||||
daddu $11,$10,$11
|
||||
sltu $2,$11,$10
|
||||
sd $11,0($4)
|
||||
j $31
|
||||
or $2,$2,$8
|
||||
|
||||
.end __mpn_add_n
|
96
sysdeps/mips/mips3/addmul_1.s
Normal file
96
sysdeps/mips/mips3/addmul_1.s
Normal file
@ -0,0 +1,96 @@
|
||||
# MIPS3 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
|
||||
# add the product to a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# size $6
|
||||
# s2_limb $7
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __mpn_addmul_1
|
||||
.ent __mpn_addmul_1
|
||||
__mpn_addmul_1:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
# warm up phase 0
|
||||
ld $8,0($5)
|
||||
|
||||
# warm up phase 1
|
||||
daddiu $5,$5,8
|
||||
dmultu $8,$7
|
||||
|
||||
daddiu $6,$6,-1
|
||||
beq $6,$0,$LC0
|
||||
move $2,$0 # zero cy2
|
||||
|
||||
daddiu $6,$6,-1
|
||||
beq $6,$0,$LC1
|
||||
ld $8,0($5) # load new s1 limb as early as possible
|
||||
|
||||
Loop: ld $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
daddiu $5,$5,8
|
||||
daddu $3,$3,$2 # add old carry limb to low product limb
|
||||
dmultu $8,$7
|
||||
ld $8,0($5) # load new s1 limb as early as possible
|
||||
daddiu $6,$6,-1 # decrement loop counter
|
||||
sltu $2,$3,$2 # carry from previous addition -> $2
|
||||
daddu $3,$10,$3
|
||||
sltu $10,$3,$10
|
||||
daddu $2,$2,$10
|
||||
sd $3,0($4)
|
||||
daddiu $4,$4,8
|
||||
bne $6,$0,Loop # should be "bnel"
|
||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 1
|
||||
$LC1: ld $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
daddu $3,$3,$2
|
||||
sltu $2,$3,$2
|
||||
dmultu $8,$7
|
||||
daddu $3,$10,$3
|
||||
sltu $10,$3,$10
|
||||
daddu $2,$2,$10
|
||||
sd $3,0($4)
|
||||
daddiu $4,$4,8
|
||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 0
|
||||
$LC0: ld $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
daddu $3,$3,$2
|
||||
sltu $2,$3,$2
|
||||
daddu $3,$10,$3
|
||||
sltu $10,$3,$10
|
||||
daddu $2,$2,$10
|
||||
sd $3,0($4)
|
||||
j $31
|
||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
.end __mpn_addmul_1
|
26
sysdeps/mips/mips3/gmp-mparam.h
Normal file
26
sysdeps/mips/mips3/gmp-mparam.h
Normal file
@ -0,0 +1,26 @@
|
||||
/* gmp-mparam.h -- Compiler/machine parameter header file.
|
||||
|
||||
Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#define BITS_PER_MP_LIMB 64
|
||||
#define BYTES_PER_MP_LIMB 8
|
||||
#define BITS_PER_LONGINT 32
|
||||
#define BITS_PER_INT 32
|
||||
#define BITS_PER_SHORTINT 16
|
||||
#define BITS_PER_CHAR 8
|
94
sysdeps/mips/mips3/lshift.s
Normal file
94
sysdeps/mips/mips3/lshift.s
Normal file
@ -0,0 +1,94 @@
|
||||
# MIPS3 __mpn_lshift --
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# src_ptr $5
|
||||
# size $6
|
||||
# cnt $7
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl __mpn_lshift
|
||||
.ent __mpn_lshift
|
||||
__mpn_lshift:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
dsll $2,$6,3
|
||||
daddu $5,$5,$2 # make r5 point at end of src
|
||||
ld $10,-8($5) # load first limb
|
||||
dsubu $13,$0,$7
|
||||
daddu $4,$4,$2 # make r4 point at end of res
|
||||
daddiu $6,$6,-1
|
||||
and $9,$6,4-1 # number of limbs in first loop
|
||||
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||
dsrl $2,$10,$13 # compute function result
|
||||
|
||||
dsubu $6,$6,$9
|
||||
|
||||
.Loop0: ld $3,-16($5)
|
||||
daddiu $4,$4,-8
|
||||
daddiu $5,$5,-8
|
||||
daddiu $9,$9,-1
|
||||
dsll $11,$10,$7
|
||||
dsrl $12,$3,$13
|
||||
move $10,$3
|
||||
or $8,$11,$12
|
||||
bne $9,$0,.Loop0
|
||||
sd $8,0($4)
|
||||
|
||||
.L0: beq $6,$0,.Lend
|
||||
nop
|
||||
|
||||
.Loop: ld $3,-16($5)
|
||||
daddiu $4,$4,-32
|
||||
daddiu $6,$6,-4
|
||||
dsll $11,$10,$7
|
||||
dsrl $12,$3,$13
|
||||
|
||||
ld $10,-24($5)
|
||||
dsll $14,$3,$7
|
||||
or $8,$11,$12
|
||||
sd $8,24($4)
|
||||
dsrl $9,$10,$13
|
||||
|
||||
ld $3,-32($5)
|
||||
dsll $11,$10,$7
|
||||
or $8,$14,$9
|
||||
sd $8,16($4)
|
||||
dsrl $12,$3,$13
|
||||
|
||||
ld $10,-40($5)
|
||||
dsll $14,$3,$7
|
||||
or $8,$11,$12
|
||||
sd $8,8($4)
|
||||
dsrl $9,$10,$13
|
||||
|
||||
daddiu $5,$5,-32
|
||||
or $8,$14,$9
|
||||
bgtz $6,.Loop
|
||||
sd $8,0($4)
|
||||
|
||||
.Lend: dsll $8,$10,$7
|
||||
j $31
|
||||
sd $8,-8($4)
|
||||
.end __mpn_lshift
|
84
sysdeps/mips/mips3/mul_1.s
Normal file
84
sysdeps/mips/mips3/mul_1.s
Normal file
@ -0,0 +1,84 @@
|
||||
# MIPS3 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||
# store the product in a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# size $6
|
||||
# s2_limb $7
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __mpn_mul_1
|
||||
.ent __mpn_mul_1
|
||||
__mpn_mul_1:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
# warm up phase 0
|
||||
ld $8,0($5)
|
||||
|
||||
# warm up phase 1
|
||||
daddiu $5,$5,8
|
||||
dmultu $8,$7
|
||||
|
||||
daddiu $6,$6,-1
|
||||
beq $6,$0,$LC0
|
||||
move $2,$0 # zero cy2
|
||||
|
||||
daddiu $6,$6,-1
|
||||
beq $6,$0,$LC1
|
||||
ld $8,0($5) # load new s1 limb as early as possible
|
||||
|
||||
Loop: mflo $10
|
||||
mfhi $9
|
||||
daddiu $5,$5,8
|
||||
daddu $10,$10,$2 # add old carry limb to low product limb
|
||||
dmultu $8,$7
|
||||
ld $8,0($5) # load new s1 limb as early as possible
|
||||
daddiu $6,$6,-1 # decrement loop counter
|
||||
sltu $2,$10,$2 # carry from previous addition -> $2
|
||||
sd $10,0($4)
|
||||
daddiu $4,$4,8
|
||||
bne $6,$0,Loop # should be "bnel"
|
||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 1
|
||||
$LC1: mflo $10
|
||||
mfhi $9
|
||||
daddu $10,$10,$2
|
||||
sltu $2,$10,$2
|
||||
dmultu $8,$7
|
||||
sd $10,0($4)
|
||||
daddiu $4,$4,8
|
||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 0
|
||||
$LC0: mflo $10
|
||||
mfhi $9
|
||||
daddu $10,$10,$2
|
||||
sltu $2,$10,$2
|
||||
sd $10,0($4)
|
||||
j $31
|
||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
.end __mpn_mul_1
|
91
sysdeps/mips/mips3/rshift.s
Normal file
91
sysdeps/mips/mips3/rshift.s
Normal file
@ -0,0 +1,91 @@
|
||||
# MIPS3 __mpn_rshift --
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# src_ptr $5
|
||||
# size $6
|
||||
# cnt $7
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl __mpn_rshift
|
||||
.ent __mpn_rshift
|
||||
__mpn_rshift:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
ld $10,0($5) # load first limb
|
||||
dsubu $13,$0,$7
|
||||
daddiu $6,$6,-1
|
||||
and $9,$6,4-1 # number of limbs in first loop
|
||||
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||
dsll $2,$10,$13 # compute function result
|
||||
|
||||
dsubu $6,$6,$9
|
||||
|
||||
.Loop0: ld $3,8($5)
|
||||
daddiu $4,$4,8
|
||||
daddiu $5,$5,8
|
||||
daddiu $9,$9,-1
|
||||
dsrl $11,$10,$7
|
||||
dsll $12,$3,$13
|
||||
move $10,$3
|
||||
or $8,$11,$12
|
||||
bne $9,$0,.Loop0
|
||||
sd $8,-8($4)
|
||||
|
||||
.L0: beq $6,$0,.Lend
|
||||
nop
|
||||
|
||||
.Loop: ld $3,8($5)
|
||||
daddiu $4,$4,32
|
||||
daddiu $6,$6,-4
|
||||
dsrl $11,$10,$7
|
||||
dsll $12,$3,$13
|
||||
|
||||
ld $10,16($5)
|
||||
dsrl $14,$3,$7
|
||||
or $8,$11,$12
|
||||
sd $8,-32($4)
|
||||
dsll $9,$10,$13
|
||||
|
||||
ld $3,24($5)
|
||||
dsrl $11,$10,$7
|
||||
or $8,$14,$9
|
||||
sd $8,-24($4)
|
||||
dsll $12,$3,$13
|
||||
|
||||
ld $10,32($5)
|
||||
dsrl $14,$3,$7
|
||||
or $8,$11,$12
|
||||
sd $8,-16($4)
|
||||
dsll $9,$10,$13
|
||||
|
||||
daddiu $5,$5,32
|
||||
or $8,$14,$9
|
||||
bgtz $6,.Loop
|
||||
sd $8,-8($4)
|
||||
|
||||
.Lend: dsrl $8,$10,$7
|
||||
j $31
|
||||
sd $8,0($4)
|
||||
.end __mpn_rshift
|
119
sysdeps/mips/mips3/sub_n.s
Normal file
119
sysdeps/mips/mips3/sub_n.s
Normal file
@ -0,0 +1,119 @@
|
||||
# MIPS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||
# store difference in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# s2_ptr $6
|
||||
# size $7
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl __mpn_sub_n
|
||||
.ent __mpn_sub_n
|
||||
__mpn_sub_n:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
ld $10,0($5)
|
||||
ld $11,0($6)
|
||||
|
||||
daddiu $7,$7,-1
|
||||
and $9,$7,4-1 # number of limbs in first loop
|
||||
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||
move $2,$0
|
||||
|
||||
dsubu $7,$7,$9
|
||||
|
||||
.Loop0: daddiu $9,$9,-1
|
||||
ld $12,8($5)
|
||||
daddu $11,$11,$2
|
||||
ld $13,8($6)
|
||||
sltu $8,$11,$2
|
||||
dsubu $11,$10,$11
|
||||
sltu $2,$10,$11
|
||||
sd $11,0($4)
|
||||
or $2,$2,$8
|
||||
|
||||
daddiu $5,$5,8
|
||||
daddiu $6,$6,8
|
||||
move $10,$12
|
||||
move $11,$13
|
||||
bne $9,$0,.Loop0
|
||||
daddiu $4,$4,8
|
||||
|
||||
.L0: beq $7,$0,.Lend
|
||||
nop
|
||||
|
||||
.Loop: daddiu $7,$7,-4
|
||||
|
||||
ld $12,8($5)
|
||||
daddu $11,$11,$2
|
||||
ld $13,8($6)
|
||||
sltu $8,$11,$2
|
||||
dsubu $11,$10,$11
|
||||
sltu $2,$10,$11
|
||||
sd $11,0($4)
|
||||
or $2,$2,$8
|
||||
|
||||
ld $10,16($5)
|
||||
daddu $13,$13,$2
|
||||
ld $11,16($6)
|
||||
sltu $8,$13,$2
|
||||
dsubu $13,$12,$13
|
||||
sltu $2,$12,$13
|
||||
sd $13,8($4)
|
||||
or $2,$2,$8
|
||||
|
||||
ld $12,24($5)
|
||||
daddu $11,$11,$2
|
||||
ld $13,24($6)
|
||||
sltu $8,$11,$2
|
||||
dsubu $11,$10,$11
|
||||
sltu $2,$10,$11
|
||||
sd $11,16($4)
|
||||
or $2,$2,$8
|
||||
|
||||
ld $10,32($5)
|
||||
daddu $13,$13,$2
|
||||
ld $11,32($6)
|
||||
sltu $8,$13,$2
|
||||
dsubu $13,$12,$13
|
||||
sltu $2,$12,$13
|
||||
sd $13,24($4)
|
||||
or $2,$2,$8
|
||||
|
||||
daddiu $5,$5,32
|
||||
daddiu $6,$6,32
|
||||
|
||||
bne $7,$0,.Loop
|
||||
daddiu $4,$4,32
|
||||
|
||||
.Lend: daddu $11,$11,$2
|
||||
sltu $8,$11,$2
|
||||
dsubu $11,$10,$11
|
||||
sltu $2,$10,$11
|
||||
sd $11,0($4)
|
||||
j $31
|
||||
or $2,$2,$8
|
||||
|
||||
.end __mpn_sub_n
|
96
sysdeps/mips/mips3/submul_1.s
Normal file
96
sysdeps/mips/mips3/submul_1.s
Normal file
@ -0,0 +1,96 @@
|
||||
# MIPS3 __mpn_submul_1 -- Multiply a limb vector with a single limb and
|
||||
# subtract the product from a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# size $6
|
||||
# s2_limb $7
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __mpn_submul_1
|
||||
.ent __mpn_submul_1
|
||||
__mpn_submul_1:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
# warm up phase 0
|
||||
ld $8,0($5)
|
||||
|
||||
# warm up phase 1
|
||||
daddiu $5,$5,8
|
||||
dmultu $8,$7
|
||||
|
||||
daddiu $6,$6,-1
|
||||
beq $6,$0,$LC0
|
||||
move $2,$0 # zero cy2
|
||||
|
||||
daddiu $6,$6,-1
|
||||
beq $6,$0,$LC1
|
||||
ld $8,0($5) # load new s1 limb as early as possible
|
||||
|
||||
Loop: ld $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
daddiu $5,$5,8
|
||||
daddu $3,$3,$2 # add old carry limb to low product limb
|
||||
dmultu $8,$7
|
||||
ld $8,0($5) # load new s1 limb as early as possible
|
||||
daddiu $6,$6,-1 # decrement loop counter
|
||||
sltu $2,$3,$2 # carry from previous addition -> $2
|
||||
dsubu $3,$10,$3
|
||||
sgtu $10,$3,$10
|
||||
daddu $2,$2,$10
|
||||
sd $3,0($4)
|
||||
daddiu $4,$4,8
|
||||
bne $6,$0,Loop # should be "bnel"
|
||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 1
|
||||
$LC1: ld $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
daddu $3,$3,$2
|
||||
sltu $2,$3,$2
|
||||
dmultu $8,$7
|
||||
dsubu $3,$10,$3
|
||||
sgtu $10,$3,$10
|
||||
daddu $2,$2,$10
|
||||
sd $3,0($4)
|
||||
daddiu $4,$4,8
|
||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 0
|
||||
$LC0: ld $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
daddu $3,$3,$2
|
||||
sltu $2,$3,$2
|
||||
dsubu $3,$10,$3
|
||||
sgtu $10,$3,$10
|
||||
daddu $2,$2,$10
|
||||
sd $3,0($4)
|
||||
j $31
|
||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
.end __mpn_submul_1
|
84
sysdeps/mips/mul_1.s
Normal file
84
sysdeps/mips/mul_1.s
Normal file
@ -0,0 +1,84 @@
|
||||
# MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||
# store the product in a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# size $6
|
||||
# s2_limb $7
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __mpn_mul_1
|
||||
.ent __mpn_mul_1
|
||||
__mpn_mul_1:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
# warm up phase 0
|
||||
lw $8,0($5)
|
||||
|
||||
# warm up phase 1
|
||||
addiu $5,$5,4
|
||||
multu $8,$7
|
||||
|
||||
addiu $6,$6,-1
|
||||
beq $6,$0,$LC0
|
||||
move $2,$0 # zero cy2
|
||||
|
||||
addiu $6,$6,-1
|
||||
beq $6,$0,$LC1
|
||||
lw $8,0($5) # load new s1 limb as early as possible
|
||||
|
||||
Loop: mflo $10
|
||||
mfhi $9
|
||||
addiu $5,$5,4
|
||||
addu $10,$10,$2 # add old carry limb to low product limb
|
||||
multu $8,$7
|
||||
lw $8,0($5) # load new s1 limb as early as possible
|
||||
addiu $6,$6,-1 # decrement loop counter
|
||||
sltu $2,$10,$2 # carry from previous addition -> $2
|
||||
sw $10,0($4)
|
||||
addiu $4,$4,4
|
||||
bne $6,$0,Loop # should be "bnel"
|
||||
addu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 1
|
||||
$LC1: mflo $10
|
||||
mfhi $9
|
||||
addu $10,$10,$2
|
||||
sltu $2,$10,$2
|
||||
multu $8,$7
|
||||
sw $10,0($4)
|
||||
addiu $4,$4,4
|
||||
addu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 0
|
||||
$LC0: mflo $10
|
||||
mfhi $9
|
||||
addu $10,$10,$2
|
||||
sltu $2,$10,$2
|
||||
sw $10,0($4)
|
||||
j $31
|
||||
addu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
.end __mpn_mul_1
|
91
sysdeps/mips/rshift.s
Normal file
91
sysdeps/mips/rshift.s
Normal file
@ -0,0 +1,91 @@
|
||||
# MIPS2 __mpn_rshift --
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# src_ptr $5
|
||||
# size $6
|
||||
# cnt $7
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl __mpn_rshift
|
||||
.ent __mpn_rshift
|
||||
__mpn_rshift:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
lw $10,0($5) # load first limb
|
||||
subu $13,$0,$7
|
||||
addiu $6,$6,-1
|
||||
and $9,$6,4-1 # number of limbs in first loop
|
||||
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||
sll $2,$10,$13 # compute function result
|
||||
|
||||
subu $6,$6,$9
|
||||
|
||||
.Loop0: lw $3,4($5)
|
||||
addiu $4,$4,4
|
||||
addiu $5,$5,4
|
||||
addiu $9,$9,-1
|
||||
srl $11,$10,$7
|
||||
sll $12,$3,$13
|
||||
move $10,$3
|
||||
or $8,$11,$12
|
||||
bne $9,$0,.Loop0
|
||||
sw $8,-4($4)
|
||||
|
||||
.L0: beq $6,$0,.Lend
|
||||
nop
|
||||
|
||||
.Loop: lw $3,4($5)
|
||||
addiu $4,$4,16
|
||||
addiu $6,$6,-4
|
||||
srl $11,$10,$7
|
||||
sll $12,$3,$13
|
||||
|
||||
lw $10,8($5)
|
||||
srl $14,$3,$7
|
||||
or $8,$11,$12
|
||||
sw $8,-16($4)
|
||||
sll $9,$10,$13
|
||||
|
||||
lw $3,12($5)
|
||||
srl $11,$10,$7
|
||||
or $8,$14,$9
|
||||
sw $8,-12($4)
|
||||
sll $12,$3,$13
|
||||
|
||||
lw $10,16($5)
|
||||
srl $14,$3,$7
|
||||
or $8,$11,$12
|
||||
sw $8,-8($4)
|
||||
sll $9,$10,$13
|
||||
|
||||
addiu $5,$5,16
|
||||
or $8,$14,$9
|
||||
bgtz $6,.Loop
|
||||
sw $8,-4($4)
|
||||
|
||||
.Lend: srl $8,$10,$7
|
||||
j $31
|
||||
sw $8,0($4)
|
||||
.end __mpn_rshift
|
119
sysdeps/mips/sub_n.s
Normal file
119
sysdeps/mips/sub_n.s
Normal file
@ -0,0 +1,119 @@
|
||||
# MIPS2 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||
# store difference in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# s2_ptr $6
|
||||
# size $7
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl __mpn_sub_n
|
||||
.ent __mpn_sub_n
|
||||
__mpn_sub_n:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
lw $10,0($5)
|
||||
lw $11,0($6)
|
||||
|
||||
addiu $7,$7,-1
|
||||
and $9,$7,4-1 # number of limbs in first loop
|
||||
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||
move $2,$0
|
||||
|
||||
subu $7,$7,$9
|
||||
|
||||
.Loop0: addiu $9,$9,-1
|
||||
lw $12,4($5)
|
||||
addu $11,$11,$2
|
||||
lw $13,4($6)
|
||||
sltu $8,$11,$2
|
||||
subu $11,$10,$11
|
||||
sltu $2,$10,$11
|
||||
sw $11,0($4)
|
||||
or $2,$2,$8
|
||||
|
||||
addiu $5,$5,4
|
||||
addiu $6,$6,4
|
||||
move $10,$12
|
||||
move $11,$13
|
||||
bne $9,$0,.Loop0
|
||||
addiu $4,$4,4
|
||||
|
||||
.L0: beq $7,$0,.Lend
|
||||
nop
|
||||
|
||||
.Loop: addiu $7,$7,-4
|
||||
|
||||
lw $12,4($5)
|
||||
addu $11,$11,$2
|
||||
lw $13,4($6)
|
||||
sltu $8,$11,$2
|
||||
subu $11,$10,$11
|
||||
sltu $2,$10,$11
|
||||
sw $11,0($4)
|
||||
or $2,$2,$8
|
||||
|
||||
lw $10,8($5)
|
||||
addu $13,$13,$2
|
||||
lw $11,8($6)
|
||||
sltu $8,$13,$2
|
||||
subu $13,$12,$13
|
||||
sltu $2,$12,$13
|
||||
sw $13,4($4)
|
||||
or $2,$2,$8
|
||||
|
||||
lw $12,12($5)
|
||||
addu $11,$11,$2
|
||||
lw $13,12($6)
|
||||
sltu $8,$11,$2
|
||||
subu $11,$10,$11
|
||||
sltu $2,$10,$11
|
||||
sw $11,8($4)
|
||||
or $2,$2,$8
|
||||
|
||||
lw $10,16($5)
|
||||
addu $13,$13,$2
|
||||
lw $11,16($6)
|
||||
sltu $8,$13,$2
|
||||
subu $13,$12,$13
|
||||
sltu $2,$12,$13
|
||||
sw $13,12($4)
|
||||
or $2,$2,$8
|
||||
|
||||
addiu $5,$5,16
|
||||
addiu $6,$6,16
|
||||
|
||||
bne $7,$0,.Loop
|
||||
addiu $4,$4,16
|
||||
|
||||
.Lend: addu $11,$11,$2
|
||||
sltu $8,$11,$2
|
||||
subu $11,$10,$11
|
||||
sltu $2,$10,$11
|
||||
sw $11,0($4)
|
||||
j $31
|
||||
or $2,$2,$8
|
||||
|
||||
.end __mpn_sub_n
|
96
sysdeps/mips/submul_1.s
Normal file
96
sysdeps/mips/submul_1.s
Normal file
@ -0,0 +1,96 @@
|
||||
# MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
|
||||
# subtract the product from a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $4
|
||||
# s1_ptr $5
|
||||
# size $6
|
||||
# s2_limb $7
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __mpn_submul_1
|
||||
.ent __mpn_submul_1
|
||||
__mpn_submul_1:
|
||||
.set noreorder
|
||||
.set nomacro
|
||||
|
||||
# warm up phase 0
|
||||
lw $8,0($5)
|
||||
|
||||
# warm up phase 1
|
||||
addiu $5,$5,4
|
||||
multu $8,$7
|
||||
|
||||
addiu $6,$6,-1
|
||||
beq $6,$0,$LC0
|
||||
move $2,$0 # zero cy2
|
||||
|
||||
addiu $6,$6,-1
|
||||
beq $6,$0,$LC1
|
||||
lw $8,0($5) # load new s1 limb as early as possible
|
||||
|
||||
Loop: lw $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
addiu $5,$5,4
|
||||
addu $3,$3,$2 # add old carry limb to low product limb
|
||||
multu $8,$7
|
||||
lw $8,0($5) # load new s1 limb as early as possible
|
||||
addiu $6,$6,-1 # decrement loop counter
|
||||
sltu $2,$3,$2 # carry from previous addition -> $2
|
||||
subu $3,$10,$3
|
||||
sgtu $10,$3,$10
|
||||
addu $2,$2,$10
|
||||
sw $3,0($4)
|
||||
addiu $4,$4,4
|
||||
bne $6,$0,Loop # should be "bnel"
|
||||
addu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 1
|
||||
$LC1: lw $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
addu $3,$3,$2
|
||||
sltu $2,$3,$2
|
||||
multu $8,$7
|
||||
subu $3,$10,$3
|
||||
sgtu $10,$3,$10
|
||||
addu $2,$2,$10
|
||||
sw $3,0($4)
|
||||
addiu $4,$4,4
|
||||
addu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
# cool down phase 0
|
||||
$LC0: lw $10,0($4)
|
||||
mflo $3
|
||||
mfhi $9
|
||||
addu $3,$3,$2
|
||||
sltu $2,$3,$2
|
||||
subu $3,$10,$3
|
||||
sgtu $10,$3,$10
|
||||
addu $2,$2,$10
|
||||
sw $3,0($4)
|
||||
j $31
|
||||
addu $2,$9,$2 # add high product limb and carry from addition
|
||||
|
||||
.end __mpn_submul_1
|
54
sysdeps/rs6000/add_n.s
Normal file
54
sysdeps/rs6000/add_n.s
Normal file
@ -0,0 +1,54 @@
|
||||
# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# s2_ptr r5
|
||||
# size r6
|
||||
|
||||
.toc
|
||||
.extern __mpn_add_n[DS]
|
||||
.extern .__mpn_add_n
|
||||
.csect [PR]
|
||||
.align 2
|
||||
.globl __mpn_add_n
|
||||
.globl .__mpn_add_n
|
||||
.csect __mpn_add_n[DS]
|
||||
__mpn_add_n:
|
||||
.long .__mpn_add_n, TOC[tc0], 0
|
||||
.csect [PR]
|
||||
.__mpn_add_n:
|
||||
mtctr 6 # copy size into CTR
|
||||
l 8,0(4) # load least significant s1 limb
|
||||
l 0,0(5) # load least significant s2 limb
|
||||
cal 3,-4(3) # offset res_ptr, it's updated before used
|
||||
a 7,0,8 # add least significant limbs, set cy
|
||||
bdz Lend # If done, skip loop
|
||||
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
|
||||
lu 0,4(5) # load s2 limb and update s2_ptr
|
||||
stu 7,4(3) # store previous limb in load latecny slot
|
||||
ae 7,0,8 # add new limbs with cy, set cy
|
||||
bdn Loop # decrement CTR and loop back
|
||||
Lend: st 7,4(3) # store ultimate result limb
|
||||
lil 3,0 # load cy into ...
|
||||
aze 3,3 # ... return value register
|
||||
br
|
122
sysdeps/rs6000/addmul_1.s
Normal file
122
sysdeps/rs6000/addmul_1.s
Normal file
@ -0,0 +1,122 @@
|
||||
# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||
# the result to a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# size r5
|
||||
# s2_limb r6
|
||||
|
||||
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
|
||||
# obtain that operation, we have to use the 32x32->64 signed multiplication
|
||||
# instruction, and add the appropriate compensation to the high limb of the
|
||||
# result. We add the multiplicand if the multiplier has its most significant
|
||||
# bit set, and we add the multiplier if the multiplicand has its most
|
||||
# significant bit set. We need to preserve the carry flag between each
|
||||
# iteration, so we have to compute the compensation carefully (the natural,
|
||||
# srai+and doesn't work). Since the POWER architecture has a branch unit
|
||||
# we can branch in zero cycles, so that's how we perform the additions.
|
||||
|
||||
.toc
|
||||
.csect .__mpn_addmul_1[PR]
|
||||
.align 2
|
||||
.globl __mpn_addmul_1
|
||||
.globl .__mpn_addmul_1
|
||||
.csect __mpn_addmul_1[DS]
|
||||
__mpn_addmul_1:
|
||||
.long .__mpn_addmul_1[PR], TOC[tc0], 0
|
||||
.csect .__mpn_addmul_1[PR]
|
||||
.__mpn_addmul_1:
|
||||
|
||||
cal 3,-4(3)
|
||||
l 0,0(4)
|
||||
cmpi 0,6,0
|
||||
mtctr 5
|
||||
mul 9,0,6
|
||||
srai 7,0,31
|
||||
and 7,7,6
|
||||
mfmq 8
|
||||
cax 9,9,7
|
||||
l 7,4(3)
|
||||
a 8,8,7 # add res_limb
|
||||
blt Lneg
|
||||
Lpos: bdz Lend
|
||||
|
||||
Lploop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 0
|
||||
ae 8,0,9 # low limb + old_cy_limb + old cy
|
||||
l 7,4(3)
|
||||
aze 10,10 # propagate cy to new cy_limb
|
||||
a 8,8,7 # add res_limb
|
||||
bge Lp0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Lp0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 0
|
||||
ae 8,0,10
|
||||
l 7,4(3)
|
||||
aze 9,9
|
||||
a 8,8,7
|
||||
bge Lp1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Lp1: bdn Lploop
|
||||
|
||||
b Lend
|
||||
|
||||
Lneg: cax 9,9,0
|
||||
bdz Lend
|
||||
Lnloop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 7
|
||||
ae 8,7,9
|
||||
l 7,4(3)
|
||||
ae 10,10,0 # propagate cy to new cy_limb
|
||||
a 8,8,7 # add res_limb
|
||||
bge Ln0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Ln0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 7
|
||||
ae 8,7,10
|
||||
l 7,4(3)
|
||||
ae 9,9,0 # propagate cy to new cy_limb
|
||||
a 8,8,7 # add res_limb
|
||||
bge Ln1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Ln1: bdn Lnloop
|
||||
b Lend
|
||||
|
||||
Lend0: cal 9,0(10)
|
||||
Lend: st 8,4(3)
|
||||
aze 3,9
|
||||
br
|
58
sysdeps/rs6000/lshift.s
Normal file
58
sysdeps/rs6000/lshift.s
Normal file
@ -0,0 +1,58 @@
|
||||
# IBM POWER __mpn_lshift --
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s_ptr r4
|
||||
# size r5
|
||||
# cnt r6
|
||||
|
||||
.toc
|
||||
.extern __mpn_lshift[DS]
|
||||
.extern .__mpn_lshift
|
||||
.csect [PR]
|
||||
.align 2
|
||||
.globl __mpn_lshift
|
||||
.globl .__mpn_lshift
|
||||
.csect __mpn_lshift[DS]
|
||||
__mpn_lshift:
|
||||
.long .__mpn_lshift, TOC[tc0], 0
|
||||
.csect [PR]
|
||||
.__mpn_lshift:
|
||||
sli 0,5,2
|
||||
cax 9,3,0
|
||||
cax 4,4,0
|
||||
sfi 8,6,32
|
||||
mtctr 5 # put limb count in CTR loop register
|
||||
lu 0,-4(4) # read most significant limb
|
||||
sre 3,0,8 # compute carry out limb, and init MQ register
|
||||
bdz Lend2 # if just one limb, skip loop
|
||||
lu 0,-4(4) # read 2:nd most significant limb
|
||||
sreq 7,0,8 # compute most significant limb of result
|
||||
bdz Lend # if just two limb, skip loop
|
||||
Loop: lu 0,-4(4) # load next lower limb
|
||||
stu 7,-4(9) # store previous result during read latency
|
||||
sreq 7,0,8 # compute result limb
|
||||
bdn Loop # loop back until CTR is zero
|
||||
Lend: stu 7,-4(9) # store 2:nd least significant limb
|
||||
Lend2: sle 7,0,6 # compute least significant limb
|
||||
st 7,-4(9) # store it" \
|
||||
br
|
109
sysdeps/rs6000/mul_1.s
Normal file
109
sysdeps/rs6000/mul_1.s
Normal file
@ -0,0 +1,109 @@
|
||||
# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||
# the result in a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# size r5
|
||||
# s2_limb r6
|
||||
|
||||
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
|
||||
# obtain that operation, we have to use the 32x32->64 signed multiplication
|
||||
# instruction, and add the appropriate compensation to the high limb of the
|
||||
# result. We add the multiplicand if the multiplier has its most significant
|
||||
# bit set, and we add the multiplier if the multiplicand has its most
|
||||
# significant bit set. We need to preserve the carry flag between each
|
||||
# iteration, so we have to compute the compensation carefully (the natural,
|
||||
# srai+and doesn't work). Since the POWER architecture has a branch unit
|
||||
# we can branch in zero cycles, so that's how we perform the additions.
|
||||
|
||||
.toc
|
||||
.csect .__mpn_mul_1[PR]
|
||||
.align 2
|
||||
.globl __mpn_mul_1
|
||||
.globl .__mpn_mul_1
|
||||
.csect __mpn_mul_1[DS]
|
||||
__mpn_mul_1:
|
||||
.long .__mpn_mul_1[PR], TOC[tc0], 0
|
||||
.csect .__mpn_mul_1[PR]
|
||||
.__mpn_mul_1:
|
||||
|
||||
cal 3,-4(3)
|
||||
l 0,0(4)
|
||||
cmpi 0,6,0
|
||||
mtctr 5
|
||||
mul 9,0,6
|
||||
srai 7,0,31
|
||||
and 7,7,6
|
||||
mfmq 8
|
||||
ai 0,0,0 # reset carry
|
||||
cax 9,9,7
|
||||
blt Lneg
|
||||
Lpos: bdz Lend
|
||||
Lploop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 0
|
||||
ae 8,0,9
|
||||
bge Lp0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Lp0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 0
|
||||
ae 8,0,10
|
||||
bge Lp1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Lp1: bdn Lploop
|
||||
b Lend
|
||||
|
||||
Lneg: cax 9,9,0
|
||||
bdz Lend
|
||||
Lnloop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
cax 10,10,0 # adjust high limb for negative s2_limb
|
||||
mfmq 0
|
||||
ae 8,0,9
|
||||
bge Ln0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Ln0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
cax 9,9,0 # adjust high limb for negative s2_limb
|
||||
mfmq 0
|
||||
ae 8,0,10
|
||||
bge Ln1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Ln1: bdn Lnloop
|
||||
b Lend
|
||||
|
||||
Lend0: cal 9,0(10)
|
||||
Lend: st 8,4(3)
|
||||
aze 3,9
|
||||
br
|
56
sysdeps/rs6000/rshift.s
Normal file
56
sysdeps/rs6000/rshift.s
Normal file
@ -0,0 +1,56 @@
|
||||
# IBM POWER __mpn_rshift --
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s_ptr r4
|
||||
# size r5
|
||||
# cnt r6
|
||||
|
||||
.toc
|
||||
.extern __mpn_rshift[DS]
|
||||
.extern .__mpn_rshift
|
||||
.csect [PR]
|
||||
.align 2
|
||||
.globl __mpn_rshift
|
||||
.globl .__mpn_rshift
|
||||
.csect __mpn_rshift[DS]
|
||||
__mpn_rshift:
|
||||
.long .__mpn_rshift, TOC[tc0], 0
|
||||
.csect [PR]
|
||||
.__mpn_rshift:
|
||||
sfi 8,6,32
|
||||
mtctr 5 # put limb count in CTR loop register
|
||||
l 0,0(4) # read least significant limb
|
||||
ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
|
||||
sle 3,0,8 # compute carry limb, and init MQ register
|
||||
bdz Lend2 # if just one limb, skip loop
|
||||
lu 0,4(4) # read 2:nd least significant limb
|
||||
sleq 7,0,8 # compute least significant limb of result
|
||||
bdz Lend # if just two limb, skip loop
|
||||
Loop: lu 0,4(4) # load next higher limb
|
||||
stu 7,4(9) # store previous result during read latency
|
||||
sleq 7,0,8 # compute result limb
|
||||
bdn Loop # loop back until CTR is zero
|
||||
Lend: stu 7,4(9) # store 2:nd most significant limb
|
||||
Lend2: sre 7,0,6 # compute most significant limb
|
||||
st 7,4(9) # store it" \
|
||||
br
|
55
sysdeps/rs6000/sub_n.s
Normal file
55
sysdeps/rs6000/sub_n.s
Normal file
@ -0,0 +1,55 @@
|
||||
# IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||
# store difference in a third limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# s2_ptr r5
|
||||
# size r6
|
||||
|
||||
.toc
|
||||
.extern __mpn_sub_n[DS]
|
||||
.extern .__mpn_sub_n
|
||||
.csect [PR]
|
||||
.align 2
|
||||
.globl __mpn_sub_n
|
||||
.globl .__mpn_sub_n
|
||||
.csect __mpn_sub_n[DS]
|
||||
__mpn_sub_n:
|
||||
.long .__mpn_sub_n, TOC[tc0], 0
|
||||
.csect [PR]
|
||||
.__mpn_sub_n:
|
||||
mtctr 6 # copy size into CTR
|
||||
l 8,0(4) # load least significant s1 limb
|
||||
l 0,0(5) # load least significant s2 limb
|
||||
cal 3,-4(3) # offset res_ptr, it's updated before used
|
||||
sf 7,0,8 # add least significant limbs, set cy
|
||||
bdz Lend # If done, skip loop
|
||||
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
|
||||
lu 0,4(5) # load s2 limb and update s2_ptr
|
||||
stu 7,4(3) # store previous limb in load latecny slot
|
||||
sfe 7,0,8 # add new limbs with cy, set cy
|
||||
bdn Loop # decrement CTR and loop back
|
||||
Lend: st 7,4(3) # store ultimate result limb
|
||||
sfe 3,0,0 # load !cy into ...
|
||||
sfi 3,3,0 # ... return value register
|
||||
br
|
127
sysdeps/rs6000/submul_1.s
Normal file
127
sysdeps/rs6000/submul_1.s
Normal file
@ -0,0 +1,127 @@
|
||||
# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
||||
# the result from a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Library General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# size r5
|
||||
# s2_limb r6
|
||||
|
||||
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
|
||||
# obtain that operation, we have to use the 32x32->64 signed multiplication
|
||||
# instruction, and add the appropriate compensation to the high limb of the
|
||||
# result. We add the multiplicand if the multiplier has its most significant
|
||||
# bit set, and we add the multiplier if the multiplicand has its most
|
||||
# significant bit set. We need to preserve the carry flag between each
|
||||
# iteration, so we have to compute the compensation carefully (the natural,
|
||||
# srai+and doesn't work). Since the POWER architecture has a branch unit
|
||||
# we can branch in zero cycles, so that's how we perform the additions.
|
||||
|
||||
.toc
|
||||
.csect .__mpn_submul_1[PR]
|
||||
.align 2
|
||||
.globl __mpn_submul_1
|
||||
.globl .__mpn_submul_1
|
||||
.csect __mpn_submul_1[DS]
|
||||
__mpn_submul_1:
|
||||
.long .__mpn_submul_1[PR], TOC[tc0], 0
|
||||
.csect .__mpn_submul_1[PR]
|
||||
.__mpn_submul_1:
|
||||
|
||||
cal 3,-4(3)
|
||||
l 0,0(4)
|
||||
cmpi 0,6,0
|
||||
mtctr 5
|
||||
mul 9,0,6
|
||||
srai 7,0,31
|
||||
and 7,7,6
|
||||
mfmq 11
|
||||
cax 9,9,7
|
||||
l 7,4(3)
|
||||
sf 8,11,7 # add res_limb
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
blt Lneg
|
||||
Lpos: bdz Lend
|
||||
|
||||
Lploop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 0
|
||||
ae 11,0,9 # low limb + old_cy_limb + old cy
|
||||
l 7,4(3)
|
||||
aze 10,10 # propagate cy to new cy_limb
|
||||
sf 8,11,7 # add res_limb
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
bge Lp0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Lp0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 0
|
||||
ae 11,0,10
|
||||
l 7,4(3)
|
||||
aze 9,9
|
||||
sf 8,11,7
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
bge Lp1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Lp1: bdn Lploop
|
||||
|
||||
b Lend
|
||||
|
||||
Lneg: cax 9,9,0
|
||||
bdz Lend
|
||||
Lnloop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 7
|
||||
ae 11,7,9
|
||||
l 7,4(3)
|
||||
ae 10,10,0 # propagate cy to new cy_limb
|
||||
sf 8,11,7 # add res_limb
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
bge Ln0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Ln0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 7
|
||||
ae 11,7,10
|
||||
l 7,4(3)
|
||||
ae 9,9,0 # propagate cy to new cy_limb
|
||||
sf 8,11,7 # add res_limb
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
bge Ln1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Ln1: bdn Lnloop
|
||||
b Lend
|
||||
|
||||
Lend0: cal 9,0(10)
|
||||
Lend: st 8,4(3)
|
||||
aze 3,9
|
||||
br
|
@ -1,7 +1,7 @@
|
||||
! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||
! sum in a third limb vector.
|
||||
|
||||
! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
@ -39,20 +39,25 @@ C_SYMBOL_NAME(__mpn_add_n):
|
||||
sub %g0,%o3,%o3
|
||||
andcc %o3,(16-1),%o3
|
||||
be Lzero
|
||||
nop
|
||||
mov %o4,%g2 ! put first s1_limb in g2 too
|
||||
|
||||
sll %o3,2,%o3 ! multiply by 4
|
||||
sub %o0,%o3,%o0 ! adjust res_ptr
|
||||
sub %o1,%o3,%o1 ! adjust s1_ptr
|
||||
sub %o2,%o3,%o2 ! adjust s2_ptr
|
||||
|
||||
mov %o4,%g2
|
||||
|
||||
#if PIC
|
||||
mov %o7,%g4 ! Save return address register
|
||||
call 1f
|
||||
add %o7,Lbase-1f,%g3
|
||||
1: mov %g4,%o7 ! Restore return address register
|
||||
#else
|
||||
sethi %hi(Lbase),%g3
|
||||
or %g3,%lo(Lbase),%g3
|
||||
#endif
|
||||
sll %o3,2,%o3 ! multiply by 4
|
||||
jmp %g3+%o3
|
||||
mov %o5,%g3
|
||||
mov %o5,%g3 ! put first s2_limb in g3 too
|
||||
|
||||
Loop: addxcc %g2,%g3,%o3
|
||||
add %o1,64,%o1
|
||||
|
@ -37,8 +37,15 @@ C_SYMBOL_NAME(__mpn_addmul_1):
|
||||
|
||||
sll %o2,4,%g1
|
||||
and %g1,(4-1)<<4,%g1
|
||||
#if PIC
|
||||
mov %o7,%g4 ! Save return address register
|
||||
call 1f
|
||||
add %o7,LL-1f,%g3
|
||||
1: mov %g4,%o7 ! Restore return address register
|
||||
#else
|
||||
sethi %hi(LL),%g3
|
||||
or %g3,%lo(LL),%g3
|
||||
#endif
|
||||
jmp %g3+%g1
|
||||
nop
|
||||
LL:
|
||||
|
@ -34,8 +34,15 @@
|
||||
C_SYMBOL_NAME(__mpn_mul_1):
|
||||
sll %o2,4,%g1
|
||||
and %g1,(4-1)<<4,%g1
|
||||
#if PIC
|
||||
mov %o7,%g4 ! Save return address register
|
||||
call 1f
|
||||
add %o7,LL-1f,%g3
|
||||
1: mov %g4,%o7 ! Restore return address register
|
||||
#else
|
||||
sethi %hi(LL),%g3
|
||||
or %g3,%lo(LL),%g3
|
||||
#endif
|
||||
jmp %g3+%g1
|
||||
ld [%o1+0],%o4 ! 1
|
||||
LL:
|
||||
|
@ -1,7 +1,7 @@
|
||||
! sparc __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||
! store difference in a third limb vector.
|
||||
|
||||
! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
@ -39,20 +39,25 @@ C_SYMBOL_NAME(__mpn_sub_n):
|
||||
sub %g0,%o3,%o3
|
||||
andcc %o3,(16-1),%o3
|
||||
be Lzero
|
||||
nop
|
||||
mov %o4,%g2 ! put first s1_limb in g2 too
|
||||
|
||||
sll %o3,2,%o3 ! multiply by 4
|
||||
sub %o0,%o3,%o0 ! adjust res_ptr
|
||||
sub %o1,%o3,%o1 ! adjust s1_ptr
|
||||
sub %o2,%o3,%o2 ! adjust s2_ptr
|
||||
|
||||
mov %o4,%g2
|
||||
|
||||
#if PIC
|
||||
mov %o7,%g4 ! Save return address register
|
||||
call 1f
|
||||
add %o7,Lbase-1f,%g3
|
||||
1: mov %g4,%o7 ! Restore return address register
|
||||
#else
|
||||
sethi %hi(Lbase),%g3
|
||||
or %g3,%lo(Lbase),%g3
|
||||
#endif
|
||||
sll %o3,2,%o3 ! multiply by 4
|
||||
jmp %g3+%o3
|
||||
mov %o5,%g3
|
||||
mov %o5,%g3 ! put first s2_limb in g3 too
|
||||
|
||||
Loop: subxcc %g2,%g3,%o3
|
||||
add %o1,64,%o1
|
||||
|
@ -1,2 +1,3 @@
|
||||
sys/socketcall.h
|
||||
sys/timex.h
|
||||
nfs/nfs.h
|
||||
|
@ -20,7 +20,11 @@ sysdep_routines := $(sysdep_routines) ipc
|
||||
endif
|
||||
|
||||
ifeq ($(subdir), socket)
|
||||
headers += sys/socketcall.h
|
||||
headers += sys/socketcall.h
|
||||
endif
|
||||
|
||||
ifeq ($(subdir), sunrpc)
|
||||
headers += nfs/nfs.h
|
||||
endif
|
||||
|
||||
config-LDFLAGS = -Wl,-dynamic-linker=/lib/ld-gnu.so.1
|
||||
|
@ -93,43 +93,61 @@ Cambridge, MA 02139, USA. */
|
||||
(2 * movl is less expensive than pushl + popl).
|
||||
|
||||
Second unlike for the other registers we don't save the content of
|
||||
%ecx and %edx when we have than 1 and 2 registers resp. */
|
||||
%ecx and %edx when we have than 1 and 2 registers resp.
|
||||
|
||||
The code below might look a bit long but we have to take care for
|
||||
the pipelined processors (i586 and up). Here the `pushl' and `popl'
|
||||
instructions are marked as NP (not pairable) but the exception is
|
||||
two consecutive of these instruction. This gives no penalty on
|
||||
i386 and i486 processors though. */
|
||||
|
||||
#undef DO_CALL
|
||||
#define DO_CALL(args) \
|
||||
PUSHARGS_##args \
|
||||
DOARGS_##args \
|
||||
int $0x80; \
|
||||
UNDOARGS_##args
|
||||
int $0x80 \
|
||||
POPARGS_##args
|
||||
|
||||
#define PUSHARGS_0 /* No arguments to push. */
|
||||
#define DOARGS_0 /* No arguments to frob. */
|
||||
#define UNDOARGS_0 /* No arguments to unfrob. */
|
||||
#define _DOARGS_0(n) /* No arguments to frob. */
|
||||
#define _UNDOARGS_0 /* No arguments to unfrob. */
|
||||
#define POPARGS_0 /* No arguments to pop. */
|
||||
#define _PUSHARGS_0 /* No arguments to push. */
|
||||
#define _DOARGS_0(n) /* No arguments to frob. */
|
||||
#define _POPARGS_0 /* No arguments to pop. */
|
||||
|
||||
#define DOARGS_1 movl %ebx, %edx; movl 4(%esp), %ebx; DOARGS_0
|
||||
#define UNDOARGS_1 UNDOARGS_0; movl %edx, %ebx
|
||||
#define _DOARGS_1(n) pushl %ebx; movl n+4(%esp), %ebx; _DOARGS_0 (n)
|
||||
#define _UNDOARGS_1 _UNDOARGS_0; popl %ebx
|
||||
#define PUSHARGS_1 movl %ebx, %edx; PUSHARGS_0
|
||||
#define DOARGS_1 _DOARGS_1 (4)
|
||||
#define POPARGS_1 POPARGS_0; movl %edx, %ebx
|
||||
#define _PUSHARGS_1 pushl %ebx; _PUSHARGS_0
|
||||
#define _DOARGS_1(n) movl n(%esp), %ebx; _DOARGS_0(n-4)
|
||||
#define _POPARGS_1 _POPARGS_0; popl %ebx
|
||||
|
||||
#define DOARGS_2 movl 8(%esp), %ecx; DOARGS_1
|
||||
#define UNDOARGS_2 UNDOARGS_1
|
||||
#define PUSHARGS_2 PUSHARGS_1
|
||||
#define DOARGS_2 _DOARGS_2 (8)
|
||||
#define POPARGS_2 POPARGS_1
|
||||
#define _PUSHARGS_2 _PUSHARGS_1
|
||||
#define _DOARGS_2(n) movl n(%esp), %ecx; _DOARGS_1 (n-4)
|
||||
#define _UNDOARGS_2 _UNDOARGS_1
|
||||
#define _POPARGS_2 _POPARGS_1
|
||||
|
||||
#define DOARGS_3 _DOARGS_3 (12)
|
||||
#define UNDOARGS_3 _UNDOARGS_3
|
||||
#define PUSHARGS_3 _PUSHARGS_2
|
||||
#define DOARGS_3 _DOARGS_3 (16)
|
||||
#define POPARGS_3 _POPARGS_3
|
||||
#define _PUSHARGS_3 _PUSHARGS_2
|
||||
#define _DOARGS_3(n) movl n(%esp), %edx; _DOARGS_2 (n-4)
|
||||
#define _UNDOARGS_3 _UNDOARGS_2
|
||||
#define _POPARGS_3 _POPARGS_2
|
||||
|
||||
#define DOARGS_4 _DOARGS_4 (16)
|
||||
#define UNDOARGS_4 _UNDOARGS_4
|
||||
#define _DOARGS_4(n) pushl %esi; movl n+4(%esp), %esi; _DOARGS_3 (n)
|
||||
#define _UNDOARGS_4 _UNDOARGS_3; popl %esi
|
||||
|
||||
#define DOARGS_5 _DOARGS_5 (20)
|
||||
#define UNDOARGS_5 _UNDOARGS_5
|
||||
#define _DOARGS_5(n) pushl %edi; movl n+4(%esp), %edi; _DOARGS_4 (n)
|
||||
#define _UNDOARGS_5 _UNDOARGS_4; popl %edi
|
||||
#define PUSHARGS_4 _PUSHARGS_4
|
||||
#define DOARGS_4 _DOARGS_4 (24)
|
||||
#define POPARGS_4 _POPARGS_4
|
||||
#define _PUSHARGS_4 pushl %esi; _PUSHARGS_3
|
||||
#define _DOARGS_4(n) movl n(%esp), %esi; _DOARGS_3 (n-4)
|
||||
#define _POPARGS_4 _POPARGS_3; popl %esi
|
||||
|
||||
#define PUSHARGS_5 _PUSHARGS_5
|
||||
#define DOARGS_5 _DOARGS_5 (32)
|
||||
#define POPARGS_5 _POPARGS_5
|
||||
#define _PUSHARGS_5 pushl %edi; _PUSHARGS_4
|
||||
#define _DOARGS_5(n) movl n(%esp), %edi; _DOARGS_4 (n-4)
|
||||
#define _POPARGS_5 _POPARGS_4; popl %edi
|
||||
|
||||
#endif /* ASSEMBLER */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* Minimum guaranteed maximum values for system limits. Hurd version.
|
||||
/* Minimum guaranteed maximum values for system limits. Linux version.
|
||||
|
||||
Copyright (C) 1993, 1994 Free Software Foundation, Inc.
|
||||
Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -18,14 +18,5 @@ License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
|
||||
Cambridge, MA 02139, USA. */
|
||||
|
||||
/* Linux has a fixed limit of supplementary groups allocated with a
|
||||
process. This value is determined by the size of the `groups'
|
||||
member of the `task_struct' structure in <linux/sched.h>. */
|
||||
|
||||
#define NGROUPS_MAX 32
|
||||
|
||||
|
||||
/* Maximum size of file names. Not all file system types support
|
||||
this size but it is only a maximum value. */
|
||||
|
||||
#define NAME_MAX 255
|
||||
/* The kernel sources contain a file with all the needed information. */
|
||||
#include <linux/limits.h>
|
||||
|
1
sysdeps/unix/sysv/linux/nfs/nfs.h
Normal file
1
sysdeps/unix/sysv/linux/nfs/nfs.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/nfs.h>
|
@ -1,3 +1,21 @@
|
||||
/* Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
||||
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#ifndef _SYS_PARAM_H
|
||||
#define _SYS_PARAM_H
|
||||
|
||||
@ -7,26 +25,21 @@
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
/* Don't change it. H.J. */
|
||||
#ifdef OLD_LINUX
|
||||
#undef MAXHOSTNAMELEN
|
||||
#define MAXHOSTNAMELEN 8 /* max length of hostname */
|
||||
#endif
|
||||
|
||||
#ifndef howmany
|
||||
#define howmany(x, y) (((x)+((y)-1))/(y))
|
||||
# define howmany(x, y) (((x)+((y)-1))/(y))
|
||||
#endif
|
||||
|
||||
#ifndef roundup
|
||||
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
|
||||
# define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
|
||||
#endif
|
||||
|
||||
#define MAXPATHLEN PATH_MAX
|
||||
#define NOFILE OPEN_MAX
|
||||
|
||||
/* Following the information of some of the kernel people I here assume
|
||||
* that block size (i.e. the value of stat.st_blocks) for all filesystem
|
||||
* is 512 bytes. If not tell me or HJ. -- Uli */
|
||||
that block size (i.e. the value of stat.st_blocks) for all filesystem
|
||||
is 512 bytes. If not tell HJ, Roland, or me. -- drepper */
|
||||
#define DEV_BSIZE 512
|
||||
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user