Rename soft-fp op-[1248].h variables to avoid risk of shadowing.

Continuing the soft-fp variable renaming from
<https://sourceware.org/ml/libc-alpha/2014-06/msg00434.html> to avoid
shadowing if two macros happen to use the same variable name and that
variable is involved in an argument one of those macros passes to
another, this patch renames variables in op-[1248].h.  (The two
patches are to different files and are independent of each other.)

Tested for powerpc32 (soft-float) and mips64 that this makes no change
to the disassembly of installed shared libraries.

	* soft-fp/op-1.h (_FP_UNPACK_RAW_1): Rename local variables to
	include macro name.
	(_FP_UNPACK_RAW_1_P): Likewise.
	(_FP_PACK_RAW_1): Likewise.
	(_FP_PACK_RAW_1_P): Likewise.
	(_FP_MUL_MEAT_1_wide): Likewise.
	(_FP_MUL_MEAT_DW_1_hard): Likewise.
	(_FP_MUL_MEAT_1_hard): Likewise.
	(_FP_DIV_MEAT_1_imm): Likewise.
	(_FP_DIV_MEAT_1_udiv_norm): Likewise.
	(_FP_DIV_MEAT_1_udiv): Likewise.
	* soft-fp/op-2.h (__FP_FRAC_DEC_2): Likewise.
	(_FP_UNPACK_RAW_2): Likewise.
	(_FP_UNPACK_RAW_2_P): Likewise.
	(_FP_PACK_RAW_2): Likewise.
	(_FP_PACK_RAW_2_P): Likewise.
	(_FP_MUL_MEAT_DW_2_wide): Likewise.
	(_FP_MUL_MEAT_2_wide): Likewise.
	(_FP_MUL_MEAT_DW_2_wide_3mul): Likewise.
	(_FP_MUL_MEAT_2_wide_3mul): Likewise.
	(_FP_MUL_MEAT_DW_2_gmp): Likewise.
	(_FP_MUL_MEAT_2_gmp): Likewise.
	(_FP_DIV_MEAT_2_udiv): Likewise.
	* soft-fp/op-4.h (_FP_FRAC_SLL_4): Likewise.
	(_FP_FRAC_SRL_4): Likewise.
	(_FP_FRAC_SRST_4): Likewise.
	(_FP_FRAC_SRS_4): Likewise.
	(_FP_UNPACK_RAW_4): Likewise.
	(_FP_UNPACK_RAW_4_P): Likewise.
	(_FP_PACK_RAW_4): Likewise.
	(_FP_PACK_RAW_4_P): Likewise.
	(_FP_MUL_MEAT_DW_4_wide): Likewise.
	(_FP_MUL_MEAT_4_wide): Likewise.
	(_FP_MUL_MEAT_4_gmp): Likewise.
	(umul_ppppmnnn): Likewise.
	(_FP_DIV_MEAT_4_udiv): Likewise.
	(__FP_FRAC_ADD_4): Likewise.
	(__FP_FRAC_SUB_3): Likewise.
	(__FP_FRAC_SUB_4): Likewise.
	(__FP_FRAC_DEC_3): Likewise.
	(__FP_FRAC_DEC_4): Likewise.
	(__FP_FRAC_ADDI_4): Likewise.
	* soft-fp/op-8.h (_FP_FRAC_SLL_8): Likewise.
	(_FP_FRAC_SRL_8): Likewise.
	(_FP_FRAC_SRS_8): Likewise.
This commit is contained in:
Joseph Myers 2014-06-30 17:37:10 +00:00
parent 9c37ec0b89
commit 3a6e988706
5 changed files with 772 additions and 510 deletions

View File

@ -1,5 +1,51 @@
2014-06-30 Joseph Myers <joseph@codesourcery.com>
* soft-fp/op-1.h (_FP_UNPACK_RAW_1): Rename local variables to
include macro name.
(_FP_UNPACK_RAW_1_P): Likewise.
(_FP_PACK_RAW_1): Likewise.
(_FP_PACK_RAW_1_P): Likewise.
(_FP_MUL_MEAT_1_wide): Likewise.
(_FP_MUL_MEAT_DW_1_hard): Likewise.
(_FP_MUL_MEAT_1_hard): Likewise.
(_FP_DIV_MEAT_1_imm): Likewise.
(_FP_DIV_MEAT_1_udiv_norm): Likewise.
(_FP_DIV_MEAT_1_udiv): Likewise.
* soft-fp/op-2.h (__FP_FRAC_DEC_2): Likewise.
(_FP_UNPACK_RAW_2): Likewise.
(_FP_UNPACK_RAW_2_P): Likewise.
(_FP_PACK_RAW_2): Likewise.
(_FP_PACK_RAW_2_P): Likewise.
(_FP_MUL_MEAT_DW_2_wide): Likewise.
(_FP_MUL_MEAT_2_wide): Likewise.
(_FP_MUL_MEAT_DW_2_wide_3mul): Likewise.
(_FP_MUL_MEAT_2_wide_3mul): Likewise.
(_FP_MUL_MEAT_DW_2_gmp): Likewise.
(_FP_MUL_MEAT_2_gmp): Likewise.
(_FP_DIV_MEAT_2_udiv): Likewise.
* soft-fp/op-4.h (_FP_FRAC_SLL_4): Likewise.
(_FP_FRAC_SRL_4): Likewise.
(_FP_FRAC_SRST_4): Likewise.
(_FP_FRAC_SRS_4): Likewise.
(_FP_UNPACK_RAW_4): Likewise.
(_FP_UNPACK_RAW_4_P): Likewise.
(_FP_PACK_RAW_4): Likewise.
(_FP_PACK_RAW_4_P): Likewise.
(_FP_MUL_MEAT_DW_4_wide): Likewise.
(_FP_MUL_MEAT_4_wide): Likewise.
(_FP_MUL_MEAT_4_gmp): Likewise.
(umul_ppppmnnn): Likewise.
(_FP_DIV_MEAT_4_udiv): Likewise.
(__FP_FRAC_ADD_4): Likewise.
(__FP_FRAC_SUB_3): Likewise.
(__FP_FRAC_SUB_4): Likewise.
(__FP_FRAC_DEC_3): Likewise.
(__FP_FRAC_DEC_4): Likewise.
(__FP_FRAC_ADDI_4): Likewise.
* soft-fp/op-8.h (_FP_FRAC_SLL_8): Likewise.
(_FP_FRAC_SRL_8): Likewise.
(_FP_FRAC_SRS_8): Likewise.
* soft-fp/extended.h (FP_UNPACK_RAW_E): Rename local variables to
include macro name.
(FP_UNPACK_RAW_EP): Likewise.

View File

@ -95,23 +95,24 @@
#define _FP_UNPACK_RAW_1(fs, X, val) \
do \
{ \
union _FP_UNION_##fs _flo; \
_flo.flt = (val); \
union _FP_UNION_##fs _FP_UNPACK_RAW_1_flo; \
_FP_UNPACK_RAW_1_flo.flt = (val); \
\
X##_f = _flo.bits.frac; \
X##_e = _flo.bits.exp; \
X##_s = _flo.bits.sign; \
X##_f = _FP_UNPACK_RAW_1_flo.bits.frac; \
X##_e = _FP_UNPACK_RAW_1_flo.bits.exp; \
X##_s = _FP_UNPACK_RAW_1_flo.bits.sign; \
} \
while (0)
#define _FP_UNPACK_RAW_1_P(fs, X, val) \
do \
{ \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
union _FP_UNION_##fs *_FP_UNPACK_RAW_1_P_flo \
= (union _FP_UNION_##fs *) (val); \
\
X##_f = _flo->bits.frac; \
X##_e = _flo->bits.exp; \
X##_s = _flo->bits.sign; \
X##_f = _FP_UNPACK_RAW_1_P_flo->bits.frac; \
X##_e = _FP_UNPACK_RAW_1_P_flo->bits.exp; \
X##_s = _FP_UNPACK_RAW_1_P_flo->bits.sign; \
} \
while (0)
@ -122,24 +123,25 @@
#define _FP_PACK_RAW_1(fs, val, X) \
do \
{ \
union _FP_UNION_##fs _flo; \
union _FP_UNION_##fs _FP_PACK_RAW_1_flo; \
\
_flo.bits.frac = X##_f; \
_flo.bits.exp = X##_e; \
_flo.bits.sign = X##_s; \
_FP_PACK_RAW_1_flo.bits.frac = X##_f; \
_FP_PACK_RAW_1_flo.bits.exp = X##_e; \
_FP_PACK_RAW_1_flo.bits.sign = X##_s; \
\
(val) = _flo.flt; \
(val) = _FP_PACK_RAW_1_flo.flt; \
} \
while (0)
#define _FP_PACK_RAW_1_P(fs, val, X) \
do \
{ \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
union _FP_UNION_##fs *_FP_PACK_RAW_1_P_flo \
= (union _FP_UNION_##fs *) (val); \
\
_flo->bits.frac = X##_f; \
_flo->bits.exp = X##_e; \
_flo->bits.sign = X##_s; \
_FP_PACK_RAW_1_P_flo->bits.frac = X##_f; \
_FP_PACK_RAW_1_P_flo->bits.exp = X##_e; \
_FP_PACK_RAW_1_P_flo->bits.sign = X##_s; \
} \
while (0)
@ -181,13 +183,14 @@
#define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit) \
do \
{ \
_FP_FRAC_DECL_2 (_Z); \
_FP_MUL_MEAT_DW_1_wide (wfracbits, _Z, X, Y, doit); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_wide_Z); \
_FP_MUL_MEAT_DW_1_wide (wfracbits, _FP_MUL_MEAT_1_wide_Z, \
X, Y, doit); \
/* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \
_FP_FRAC_SRS_2 (_Z, wfracbits-1, 2*wfracbits); \
R##_f = _Z_f0; \
_FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_wide_Z, wfracbits-1, 2*wfracbits); \
R##_f = _FP_MUL_MEAT_1_wide_Z_f0; \
} \
while (0)
@ -196,39 +199,48 @@
#define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y) \
do \
{ \
_FP_W_TYPE _xh, _xl, _yh, _yl; \
_FP_FRAC_DECL_2 (_a); \
_FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_xh, _FP_MUL_MEAT_DW_1_hard_xl; \
_FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_yh, _FP_MUL_MEAT_DW_1_hard_yl; \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_1_hard_a); \
\
/* split the words in half */ \
_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \
_xl = X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \
_yl = Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
_FP_MUL_MEAT_DW_1_hard_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \
_FP_MUL_MEAT_DW_1_hard_xl \
= X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
_FP_MUL_MEAT_DW_1_hard_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \
_FP_MUL_MEAT_DW_1_hard_yl \
= Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
\
/* multiply the pieces */ \
R##_f0 = _xl * _yl; \
_a_f0 = _xh * _yl; \
_a_f1 = _xl * _yh; \
R##_f1 = _xh * _yh; \
R##_f0 = _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yl; \
_FP_MUL_MEAT_DW_1_hard_a_f0 \
= _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yl; \
_FP_MUL_MEAT_DW_1_hard_a_f1 \
= _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yh; \
R##_f1 = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yh; \
\
/* reassemble into two full words */ \
if ((_a_f0 += _a_f1) < _a_f1) \
if ((_FP_MUL_MEAT_DW_1_hard_a_f0 += _FP_MUL_MEAT_DW_1_hard_a_f1) \
< _FP_MUL_MEAT_DW_1_hard_a_f1) \
R##_f1 += (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2); \
_a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \
_a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \
_FP_FRAC_ADD_2 (R, R, _a); \
_FP_MUL_MEAT_DW_1_hard_a_f1 \
= _FP_MUL_MEAT_DW_1_hard_a_f0 >> (_FP_W_TYPE_SIZE/2); \
_FP_MUL_MEAT_DW_1_hard_a_f0 \
= _FP_MUL_MEAT_DW_1_hard_a_f0 << (_FP_W_TYPE_SIZE/2); \
_FP_FRAC_ADD_2 (R, R, _FP_MUL_MEAT_DW_1_hard_a); \
} \
while (0)
#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \
do \
{ \
_FP_FRAC_DECL_2 (_z); \
_FP_MUL_MEAT_DW_1_hard (wfracbits, _z, X, Y); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_hard_z); \
_FP_MUL_MEAT_DW_1_hard (wfracbits, _FP_MUL_MEAT_1_hard_z, X, Y); \
\
/* normalize */ \
_FP_FRAC_SRS_2 (_z, wfracbits - 1, 2*wfracbits); \
R##_f = _z_f0; \
_FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_hard_z, \
wfracbits - 1, 2*wfracbits); \
R##_f = _FP_MUL_MEAT_1_hard_z_f0; \
} \
while (0)
@ -245,12 +257,12 @@
#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \
do \
{ \
_FP_W_TYPE _q, _r; \
_FP_W_TYPE _FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r; \
X##_f <<= (X##_f < Y##_f \
? R##_e--, _FP_WFRACBITS_##fs \
: _FP_WFRACBITS_##fs - 1); \
doit (_q, _r, X##_f, Y##_f); \
R##_f = _q | (_r != 0); \
doit (_FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r, X##_f, Y##_f); \
R##_f = _FP_DIV_MEAT_1_imm_q | (_FP_DIV_MEAT_1_imm_r != 0); \
} \
while (0)
@ -262,46 +274,58 @@
#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \
do \
{ \
_FP_W_TYPE _nh, _nl, _q, _r, _y; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nh; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nl; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_q; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_r; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_y; \
\
/* Normalize Y -- i.e. make the most significant bit set. */ \
_y = Y##_f << _FP_WFRACXBITS_##fs; \
_FP_DIV_MEAT_1_udiv_norm_y = Y##_f << _FP_WFRACXBITS_##fs; \
\
/* Shift X op correspondingly high, that is, up one full word. */ \
if (X##_f < Y##_f) \
{ \
R##_e--; \
_nl = 0; \
_nh = X##_f; \
_FP_DIV_MEAT_1_udiv_norm_nl = 0; \
_FP_DIV_MEAT_1_udiv_norm_nh = X##_f; \
} \
else \
{ \
_nl = X##_f << (_FP_W_TYPE_SIZE - 1); \
_nh = X##_f >> 1; \
_FP_DIV_MEAT_1_udiv_norm_nl = X##_f << (_FP_W_TYPE_SIZE - 1); \
_FP_DIV_MEAT_1_udiv_norm_nh = X##_f >> 1; \
} \
\
udiv_qrnnd (_q, _r, _nh, _nl, _y); \
R##_f = _q | (_r != 0); \
udiv_qrnnd (_FP_DIV_MEAT_1_udiv_norm_q, \
_FP_DIV_MEAT_1_udiv_norm_r, \
_FP_DIV_MEAT_1_udiv_norm_nh, \
_FP_DIV_MEAT_1_udiv_norm_nl, \
_FP_DIV_MEAT_1_udiv_norm_y); \
R##_f = (_FP_DIV_MEAT_1_udiv_norm_q \
| (_FP_DIV_MEAT_1_udiv_norm_r != 0)); \
} \
while (0)
#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \
do \
{ \
_FP_W_TYPE _nh, _nl, _q, _r; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r; \
if (X##_f < Y##_f) \
{ \
R##_e--; \
_nl = X##_f << _FP_WFRACBITS_##fs; \
_nh = X##_f >> _FP_WFRACXBITS_##fs; \
_FP_DIV_MEAT_1_udiv_nl = X##_f << _FP_WFRACBITS_##fs; \
_FP_DIV_MEAT_1_udiv_nh = X##_f >> _FP_WFRACXBITS_##fs; \
} \
else \
{ \
_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \
_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \
_FP_DIV_MEAT_1_udiv_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \
_FP_DIV_MEAT_1_udiv_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \
} \
udiv_qrnnd (_q, _r, _nh, _nl, Y##_f); \
R##_f = _q | (_r != 0); \
udiv_qrnnd (_FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r, \
_FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl, \
Y##_f); \
R##_f = _FP_DIV_MEAT_1_udiv_q | (_FP_DIV_MEAT_1_udiv_r != 0); \
} \
while (0)

View File

@ -185,8 +185,8 @@
# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
do \
{ \
UWtype _t = xl; \
xh -= yh + ((xl -= yl) > _t); \
UWtype __FP_FRAC_DEC_2_t = xl; \
xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
} \
while (0)
# endif
@ -213,25 +213,26 @@
#define _FP_UNPACK_RAW_2(fs, X, val) \
do \
{ \
union _FP_UNION_##fs _flo; \
_flo.flt = (val); \
union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
_FP_UNPACK_RAW_2_flo.flt = (val); \
\
X##_f0 = _flo.bits.frac0; \
X##_f1 = _flo.bits.frac1; \
X##_e = _flo.bits.exp; \
X##_s = _flo.bits.sign; \
X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
} \
while (0)
#define _FP_UNPACK_RAW_2_P(fs, X, val) \
do \
{ \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
= (union _FP_UNION_##fs *) (val); \
\
X##_f0 = _flo->bits.frac0; \
X##_f1 = _flo->bits.frac1; \
X##_e = _flo->bits.exp; \
X##_s = _flo->bits.sign; \
X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
} \
while (0)
@ -243,26 +244,27 @@
#define _FP_PACK_RAW_2(fs, val, X) \
do \
{ \
union _FP_UNION_##fs _flo; \
union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
\
_flo.bits.frac0 = X##_f0; \
_flo.bits.frac1 = X##_f1; \
_flo.bits.exp = X##_e; \
_flo.bits.sign = X##_s; \
_FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
_FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
_FP_PACK_RAW_2_flo.bits.exp = X##_e; \
_FP_PACK_RAW_2_flo.bits.sign = X##_s; \
\
(val) = _flo.flt; \
(val) = _FP_PACK_RAW_2_flo.flt; \
} \
while (0)
#define _FP_PACK_RAW_2_P(fs, val, X) \
do \
{ \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
= (union _FP_UNION_##fs *) (val); \
\
_flo->bits.frac0 = X##_f0; \
_flo->bits.frac1 = X##_f1; \
_flo->bits.exp = X##_e; \
_flo->bits.sign = X##_s; \
_FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
_FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
_FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
_FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
} \
while (0)
@ -276,20 +278,28 @@
#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
do \
{ \
_FP_FRAC_DECL_2 (_b); \
_FP_FRAC_DECL_2 (_c); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
\
doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \
doit (_b_f1, _b_f0, X##_f0, Y##_f1); \
doit (_c_f1, _c_f0, X##_f1, Y##_f0); \
doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), X##_f1, Y##_f1); \
doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
X##_f0, Y##_f0); \
doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
X##_f0, Y##_f1); \
doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
X##_f1, Y##_f0); \
doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
X##_f1, Y##_f1); \
\
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), 0, _b_f1, _b_f0, \
_FP_FRAC_WORD_4 (R, 1), 0, \
_FP_MUL_MEAT_DW_2_wide_b_f1, \
_FP_MUL_MEAT_DW_2_wide_b_f0, \
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0, \
_FP_FRAC_WORD_4 (R, 1), 0, \
_FP_MUL_MEAT_DW_2_wide_c_f1, \
_FP_MUL_MEAT_DW_2_wide_c_f0, \
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1)); \
} \
@ -298,16 +308,17 @@
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
do \
{ \
_FP_FRAC_DECL_4 (_z); \
_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
\
_FP_MUL_MEAT_DW_2_wide (wfracbits, _z, X, Y, doit); \
_FP_MUL_MEAT_DW_2_wide (wfracbits, _FP_MUL_MEAT_2_wide_z, \
X, Y, doit); \
\
/* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \
_FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \
R##_f0 = _FP_FRAC_WORD_4 (_z, 0); \
R##_f1 = _FP_FRAC_WORD_4 (_z, 1); \
_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, wfracbits-1, 2*wfracbits); \
R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
} \
while (0)
@ -318,35 +329,51 @@
#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
do \
{ \
_FP_FRAC_DECL_2 (_b); \
_FP_FRAC_DECL_2 (_c); \
_FP_W_TYPE _d; \
int _c1, _c2; \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
_FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
\
_b_f0 = X##_f0 + X##_f1; \
_c1 = _b_f0 < X##_f0; \
_b_f1 = Y##_f0 + Y##_f1; \
_c2 = _b_f1 < Y##_f0; \
doit (_d, _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \
doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), _b_f0, _b_f1); \
doit (_c_f1, _c_f0, X##_f1, Y##_f1); \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
= _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
_FP_MUL_MEAT_DW_2_wide_3mul_c2 \
= _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
X##_f0, Y##_f0); \
doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
\
_b_f0 &= -_c2; \
_b_f1 &= -_c1; \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
&= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
&= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), (_c1 & _c2), 0, _d, \
_FP_FRAC_WORD_4 (R, 1), \
(_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
& _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
_FP_MUL_MEAT_DW_2_wide_3mul_d, \
0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_b_f0); \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_b_f1); \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), \
0, _d, _FP_FRAC_WORD_4 (R, 0)); \
0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
_FP_FRAC_WORD_4 (R, 0)); \
__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0); \
_FP_FRAC_WORD_4 (R, 1), 0, \
_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
__FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_c_f1, _c_f0, \
_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
} \
while (0)
@ -354,45 +381,50 @@
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
do \
{ \
_FP_FRAC_DECL_4 (_z); \
_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
\
_FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, _z, X, Y, doit); \
_FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, \
_FP_MUL_MEAT_2_wide_3mul_z, \
X, Y, doit); \
\
/* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \
_FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \
R##_f0 = _FP_FRAC_WORD_4 (_z, 0); \
R##_f1 = _FP_FRAC_WORD_4 (_z, 1); \
_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
wfracbits-1, 2*wfracbits); \
R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
} \
while (0)
#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
do \
{ \
_FP_W_TYPE _x[2], _y[2]; \
_x[0] = X##_f0; \
_x[1] = X##_f1; \
_y[0] = Y##_f0; \
_y[1] = Y##_f1; \
_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
_FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
_FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
_FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
_FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
\
mpn_mul_n (R##_f, _x, _y, 2); \
mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
_FP_MUL_MEAT_DW_2_gmp_y, 2); \
} \
while (0)
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
do \
{ \
_FP_FRAC_DECL_4 (_z); \
_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
\
_FP_MUL_MEAT_DW_2_gmp (wfracbits, _z, X, Y); \
_FP_MUL_MEAT_DW_2_gmp (wfracbits, _FP_MUL_MEAT_2_gmp_z, X, Y); \
\
/* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \
_FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \
R##_f0 = _z_f[0]; \
R##_f1 = _z_f[1]; \
_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, wfracbits-1, 2*wfracbits); \
R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
} \
while (0)
@ -507,67 +539,91 @@
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
do \
{ \
_FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
if (_FP_FRAC_GE_2 (X, Y)) \
{ \
_n_f2 = X##_f1 >> 1; \
_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
_FP_DIV_MEAT_2_udiv_n_f1 \
= X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
_FP_DIV_MEAT_2_udiv_n_f0 \
= X##_f0 << (_FP_W_TYPE_SIZE - 1); \
} \
else \
{ \
R##_e--; \
_n_f2 = X##_f1; \
_n_f1 = X##_f0; \
_n_f0 = 0; \
_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
_FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
_FP_DIV_MEAT_2_udiv_n_f0 = 0; \
} \
\
/* Normalize, i.e. make the most significant bit of the \
denominator set. */ \
_FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
\
udiv_qrnnd (R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
umul_ppmm (_m_f1, _m_f0, R##_f1, Y##_f0); \
_r_f0 = _n_f0; \
if (_FP_FRAC_GT_2 (_m, _r)) \
udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
_FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
Y##_f1); \
umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
R##_f1, Y##_f0); \
_FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
{ \
R##_f1--; \
_FP_FRAC_ADD_2 (_r, Y, _r); \
if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r)) \
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
_FP_DIV_MEAT_2_udiv_r); \
if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
_FP_DIV_MEAT_2_udiv_r)) \
{ \
R##_f1--; \
_FP_FRAC_ADD_2 (_r, Y, _r); \
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
_FP_DIV_MEAT_2_udiv_r); \
} \
} \
_FP_FRAC_DEC_2 (_r, _m); \
_FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
\
if (_r_f1 == Y##_f1) \
if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
{ \
/* This is a special case, not an optimization \
(_r/Y##_f1 would not fit into UWtype). \
As _r is guaranteed to be < Y, R##_f0 can be either \
(UWtype)-1 or (UWtype)-2. But as we know what kind \
of bits it is (sticky, guard, round), we don't care. \
We also don't care what the reminder is, because the \
guard bit will be set anyway. -jj */ \
(_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
know what kind of bits it is (sticky, guard, round), \
we don't care. We also don't care what the reminder is, \
because the guard bit will be set anyway. -jj */ \
R##_f0 = -1; \
} \
else \
{ \
udiv_qrnnd (R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
umul_ppmm (_m_f1, _m_f0, R##_f0, Y##_f0); \
_r_f0 = 0; \
if (_FP_FRAC_GT_2 (_m, _r)) \
udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
_FP_DIV_MEAT_2_udiv_r_f1, \
_FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
_FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
_FP_DIV_MEAT_2_udiv_r_f0 = 0; \
if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
_FP_DIV_MEAT_2_udiv_r)) \
{ \
R##_f0--; \
_FP_FRAC_ADD_2 (_r, Y, _r); \
if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r)) \
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
_FP_DIV_MEAT_2_udiv_r); \
if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
_FP_DIV_MEAT_2_udiv_r)) \
{ \
R##_f0--; \
_FP_FRAC_ADD_2 (_r, Y, _r); \
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
_FP_DIV_MEAT_2_udiv_r); \
} \
} \
if (!_FP_FRAC_EQ_2 (_r, _m)) \
if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
_FP_DIV_MEAT_2_udiv_m)) \
R##_f0 |= _FP_WORK_STICKY; \
} \
} \

View File

@ -42,22 +42,31 @@
#define _FP_FRAC_SLL_4(X, N) \
do \
{ \
_FP_I_TYPE _up, _down, _skip, _i; \
_skip = (N) / _FP_W_TYPE_SIZE; \
_up = (N) % _FP_W_TYPE_SIZE; \
_down = _FP_W_TYPE_SIZE - _up; \
if (!_up) \
for (_i = 3; _i >= _skip; --_i) \
X##_f[_i] = X##_f[_i-_skip]; \
_FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down; \
_FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i; \
_FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE; \
_FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE; \
_FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up; \
if (!_FP_FRAC_SLL_4_up) \
for (_FP_FRAC_SLL_4_i = 3; \
_FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip; \
--_FP_FRAC_SLL_4_i) \
X##_f[_FP_FRAC_SLL_4_i] \
= X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]; \
else \
{ \
for (_i = 3; _i > _skip; --_i) \
X##_f[_i] = (X##_f[_i-_skip] << _up \
| X##_f[_i-_skip-1] >> _down); \
X##_f[_i--] = X##_f[0] << _up; \
for (_FP_FRAC_SLL_4_i = 3; \
_FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip; \
--_FP_FRAC_SLL_4_i) \
X##_f[_FP_FRAC_SLL_4_i] \
= ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip] \
<< _FP_FRAC_SLL_4_up) \
| (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1] \
>> _FP_FRAC_SLL_4_down)); \
X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up; \
} \
for (; _i >= 0; --_i) \
X##_f[_i] = 0; \
for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i) \
X##_f[_FP_FRAC_SLL_4_i] = 0; \
} \
while (0)
@ -65,22 +74,31 @@
#define _FP_FRAC_SRL_4(X, N) \
do \
{ \
_FP_I_TYPE _up, _down, _skip, _i; \
_skip = (N) / _FP_W_TYPE_SIZE; \
_down = (N) % _FP_W_TYPE_SIZE; \
_up = _FP_W_TYPE_SIZE - _down; \
if (!_down) \
for (_i = 0; _i <= 3-_skip; ++_i) \
X##_f[_i] = X##_f[_i+_skip]; \
_FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down; \
_FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i; \
_FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE; \
_FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE; \
_FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down; \
if (!_FP_FRAC_SRL_4_down) \
for (_FP_FRAC_SRL_4_i = 0; \
_FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip; \
++_FP_FRAC_SRL_4_i) \
X##_f[_FP_FRAC_SRL_4_i] \
= X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]; \
else \
{ \
for (_i = 0; _i < 3-_skip; ++_i) \
X##_f[_i] = (X##_f[_i+_skip] >> _down \
| X##_f[_i+_skip+1] << _up); \
X##_f[_i++] = X##_f[3] >> _down; \
for (_FP_FRAC_SRL_4_i = 0; \
_FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip; \
++_FP_FRAC_SRL_4_i) \
X##_f[_FP_FRAC_SRL_4_i] \
= ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip] \
>> _FP_FRAC_SRL_4_down) \
| (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1] \
<< _FP_FRAC_SRL_4_up)); \
X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down; \
} \
for (; _i < 4; ++_i) \
X##_f[_i] = 0; \
for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i) \
X##_f[_FP_FRAC_SRL_4_i] = 0; \
} \
while (0)
@ -93,36 +111,49 @@
#define _FP_FRAC_SRST_4(X, S, N, size) \
do \
{ \
_FP_I_TYPE _up, _down, _skip, _i; \
_FP_W_TYPE _s; \
_skip = (N) / _FP_W_TYPE_SIZE; \
_down = (N) % _FP_W_TYPE_SIZE; \
_up = _FP_W_TYPE_SIZE - _down; \
for (_s = _i = 0; _i < _skip; ++_i) \
_s |= X##_f[_i]; \
if (!_down) \
for (_i = 0; _i <= 3-_skip; ++_i) \
X##_f[_i] = X##_f[_i+_skip]; \
_FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down; \
_FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i; \
_FP_W_TYPE _FP_FRAC_SRST_4_s; \
_FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE; \
_FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE; \
_FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down; \
for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0; \
_FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip; \
++_FP_FRAC_SRST_4_i) \
_FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i]; \
if (!_FP_FRAC_SRST_4_down) \
for (_FP_FRAC_SRST_4_i = 0; \
_FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip; \
++_FP_FRAC_SRST_4_i) \
X##_f[_FP_FRAC_SRST_4_i] \
= X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]; \
else \
{ \
_s |= X##_f[_i] << _up; \
for (_i = 0; _i < 3-_skip; ++_i) \
X##_f[_i] = (X##_f[_i+_skip] >> _down \
| X##_f[_i+_skip+1] << _up); \
X##_f[_i++] = X##_f[3] >> _down; \
_FP_FRAC_SRST_4_s \
|= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up; \
for (_FP_FRAC_SRST_4_i = 0; \
_FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip; \
++_FP_FRAC_SRST_4_i) \
X##_f[_FP_FRAC_SRST_4_i] \
= ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip] \
>> _FP_FRAC_SRST_4_down) \
| (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1] \
<< _FP_FRAC_SRST_4_up)); \
X##_f[_FP_FRAC_SRST_4_i++] \
= X##_f[3] >> _FP_FRAC_SRST_4_down; \
} \
for (; _i < 4; ++_i) \
X##_f[_i] = 0; \
S = (_s != 0); \
for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i) \
X##_f[_FP_FRAC_SRST_4_i] = 0; \
S = (_FP_FRAC_SRST_4_s != 0); \
} \
while (0)
#define _FP_FRAC_SRS_4(X, N, size) \
do \
{ \
int _sticky; \
_FP_FRAC_SRST_4 (X, _sticky, N, size); \
X##_f[0] |= _sticky; \
int _FP_FRAC_SRS_4_sticky; \
_FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, N, size); \
X##_f[0] |= _FP_FRAC_SRS_4_sticky; \
} \
while (0)
@ -204,56 +235,58 @@
#define _FP_UNPACK_RAW_4(fs, X, val) \
do \
{ \
union _FP_UNION_##fs _flo; \
_flo.flt = (val); \
X##_f[0] = _flo.bits.frac0; \
X##_f[1] = _flo.bits.frac1; \
X##_f[2] = _flo.bits.frac2; \
X##_f[3] = _flo.bits.frac3; \
X##_e = _flo.bits.exp; \
X##_s = _flo.bits.sign; \
union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo; \
_FP_UNPACK_RAW_4_flo.flt = (val); \
X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0; \
X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1; \
X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2; \
X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3; \
X##_e = _FP_UNPACK_RAW_4_flo.bits.exp; \
X##_s = _FP_UNPACK_RAW_4_flo.bits.sign; \
} \
while (0)
#define _FP_UNPACK_RAW_4_P(fs, X, val) \
do \
{ \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo \
= (union _FP_UNION_##fs *) (val); \
\
X##_f[0] = _flo->bits.frac0; \
X##_f[1] = _flo->bits.frac1; \
X##_f[2] = _flo->bits.frac2; \
X##_f[3] = _flo->bits.frac3; \
X##_e = _flo->bits.exp; \
X##_s = _flo->bits.sign; \
X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0; \
X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1; \
X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2; \
X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3; \
X##_e = _FP_UNPACK_RAW_4_P_flo->bits.exp; \
X##_s = _FP_UNPACK_RAW_4_P_flo->bits.sign; \
} \
while (0)
#define _FP_PACK_RAW_4(fs, val, X) \
do \
{ \
union _FP_UNION_##fs _flo; \
_flo.bits.frac0 = X##_f[0]; \
_flo.bits.frac1 = X##_f[1]; \
_flo.bits.frac2 = X##_f[2]; \
_flo.bits.frac3 = X##_f[3]; \
_flo.bits.exp = X##_e; \
_flo.bits.sign = X##_s; \
(val) = _flo.flt; \
union _FP_UNION_##fs _FP_PACK_RAW_4_flo; \
_FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0]; \
_FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1]; \
_FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2]; \
_FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3]; \
_FP_PACK_RAW_4_flo.bits.exp = X##_e; \
_FP_PACK_RAW_4_flo.bits.sign = X##_s; \
(val) = _FP_PACK_RAW_4_flo.flt; \
} \
while (0)
#define _FP_PACK_RAW_4_P(fs, val, X) \
do \
{ \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo \
= (union _FP_UNION_##fs *) (val); \
\
_flo->bits.frac0 = X##_f[0]; \
_flo->bits.frac1 = X##_f[1]; \
_flo->bits.frac2 = X##_f[2]; \
_flo->bits.frac3 = X##_f[3]; \
_flo->bits.exp = X##_e; \
_flo->bits.sign = X##_s; \
_FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0]; \
_FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1]; \
_FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2]; \
_FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3]; \
_FP_PACK_RAW_4_P_flo->bits.exp = X##_e; \
_FP_PACK_RAW_4_P_flo->bits.sign = X##_s; \
} \
while (0)
@ -266,81 +299,126 @@
#define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
do \
{ \
_FP_FRAC_DECL_2 (_b); \
_FP_FRAC_DECL_2 (_c); \
_FP_FRAC_DECL_2 (_d); \
_FP_FRAC_DECL_2 (_e); \
_FP_FRAC_DECL_2 (_f); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e); \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f); \
\
doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), X##_f[0], Y##_f[0]); \
doit (_b_f1, _b_f0, X##_f[0], Y##_f[1]); \
doit (_c_f1, _c_f0, X##_f[1], Y##_f[0]); \
doit (_d_f1, _d_f0, X##_f[1], Y##_f[1]); \
doit (_e_f1, _e_f0, X##_f[0], Y##_f[2]); \
doit (_f_f1, _f_f0, X##_f[2], Y##_f[0]); \
doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), \
X##_f[0], Y##_f[0]); \
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
X##_f[0], Y##_f[1]); \
doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
X##_f[1], Y##_f[0]); \
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
X##_f[1], Y##_f[1]); \
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
X##_f[0], Y##_f[2]); \
doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
X##_f[2], Y##_f[0]); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
_FP_FRAC_WORD_8 (R, 1), 0, _b_f1, _b_f0, \
_FP_FRAC_WORD_8 (R, 1), 0, \
_FP_MUL_MEAT_DW_4_wide_b_f1, \
_FP_MUL_MEAT_DW_4_wide_b_f0, \
0, 0, _FP_FRAC_WORD_8 (R, 1)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
_FP_FRAC_WORD_8 (R, 1), 0, _c_f1, _c_f0, \
_FP_FRAC_WORD_8 (R, 1), 0, \
_FP_MUL_MEAT_DW_4_wide_c_f1, \
_FP_MUL_MEAT_DW_4_wide_c_f0, \
_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
_FP_FRAC_WORD_8 (R, 1)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2), 0, _d_f1, _d_f0, \
_FP_FRAC_WORD_8 (R, 2), 0, \
_FP_MUL_MEAT_DW_4_wide_d_f1, \
_FP_MUL_MEAT_DW_4_wide_d_f0, \
0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2), 0, _e_f1, _e_f0, \
_FP_FRAC_WORD_8 (R, 2), 0, \
_FP_MUL_MEAT_DW_4_wide_e_f1, \
_FP_MUL_MEAT_DW_4_wide_e_f0, \
_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2), 0, _f_f1, _f_f0, \
_FP_FRAC_WORD_8 (R, 2), 0, \
_FP_MUL_MEAT_DW_4_wide_f_f1, \
_FP_MUL_MEAT_DW_4_wide_f_f0, \
_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2)); \
doit (_b_f1, _b_f0, X##_f[0], Y##_f[3]); \
doit (_c_f1, _c_f0, X##_f[3], Y##_f[0]); \
doit (_d_f1, _d_f0, X##_f[1], Y##_f[2]); \
doit (_e_f1, _e_f0, X##_f[2], Y##_f[1]); \
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, \
_FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]); \
doit (_FP_MUL_MEAT_DW_4_wide_c_f1, \
_FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]); \
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
X##_f[1], Y##_f[2]); \
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
X##_f[2], Y##_f[1]); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3), 0, _b_f1, _b_f0, \
_FP_FRAC_WORD_8 (R, 3), 0, \
_FP_MUL_MEAT_DW_4_wide_b_f1, \
_FP_MUL_MEAT_DW_4_wide_b_f0, \
0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3), 0, _c_f1, _c_f0, \
_FP_FRAC_WORD_8 (R, 3), 0, \
_FP_MUL_MEAT_DW_4_wide_c_f1, \
_FP_MUL_MEAT_DW_4_wide_c_f0, \
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3), 0, _d_f1, _d_f0, \
_FP_FRAC_WORD_8 (R, 3), 0, \
_FP_MUL_MEAT_DW_4_wide_d_f1, \
_FP_MUL_MEAT_DW_4_wide_d_f0, \
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3), 0, _e_f1, _e_f0, \
_FP_FRAC_WORD_8 (R, 3), 0, \
_FP_MUL_MEAT_DW_4_wide_e_f1, \
_FP_MUL_MEAT_DW_4_wide_e_f0, \
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3)); \
doit (_b_f1, _b_f0, X##_f[2], Y##_f[2]); \
doit (_c_f1, _c_f0, X##_f[1], Y##_f[3]); \
doit (_d_f1, _d_f0, X##_f[3], Y##_f[1]); \
doit (_e_f1, _e_f0, X##_f[2], Y##_f[3]); \
doit (_f_f1, _f_f0, X##_f[3], Y##_f[2]); \
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
X##_f[2], Y##_f[2]); \
doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
X##_f[1], Y##_f[3]); \
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
X##_f[3], Y##_f[1]); \
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
X##_f[2], Y##_f[3]); \
doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
X##_f[3], Y##_f[2]); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4), 0, _b_f1, _b_f0, \
_FP_FRAC_WORD_8 (R, 4), 0, \
_FP_MUL_MEAT_DW_4_wide_b_f1, \
_FP_MUL_MEAT_DW_4_wide_b_f0, \
0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4), 0, _c_f1, _c_f0, \
_FP_FRAC_WORD_8 (R, 4), 0, \
_FP_MUL_MEAT_DW_4_wide_c_f1, \
_FP_MUL_MEAT_DW_4_wide_c_f0, \
_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4), 0, _d_f1, _d_f0, \
_FP_FRAC_WORD_8 (R, 4), 0, \
_FP_MUL_MEAT_DW_4_wide_d_f1, \
_FP_MUL_MEAT_DW_4_wide_d_f0, \
_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
_FP_FRAC_WORD_8 (R, 5), 0, _e_f1, _e_f0, \
_FP_FRAC_WORD_8 (R, 5), 0, \
_FP_MUL_MEAT_DW_4_wide_e_f1, \
_FP_MUL_MEAT_DW_4_wide_e_f0, \
0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
_FP_FRAC_WORD_8 (R, 5), 0, _f_f1, _f_f0, \
_FP_FRAC_WORD_8 (R, 5), 0, \
_FP_MUL_MEAT_DW_4_wide_f_f1, \
_FP_MUL_MEAT_DW_4_wide_f_f0, \
_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
_FP_FRAC_WORD_8 (R, 5)); \
doit (_b_f1, _b_f0, X##_f[3], Y##_f[3]); \
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
X##_f[3], Y##_f[3]); \
__FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
_b_f1, _b_f0, \
_FP_MUL_MEAT_DW_4_wide_b_f1, \
_FP_MUL_MEAT_DW_4_wide_b_f0, \
_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
} \
while (0)
@ -348,16 +426,19 @@
#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
do \
{ \
_FP_FRAC_DECL_8 (_z); \
_FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z); \
\
_FP_MUL_MEAT_DW_4_wide (wfracbits, _z, X, Y, doit); \
_FP_MUL_MEAT_DW_4_wide (wfracbits, _FP_MUL_MEAT_4_wide_z, \
X, Y, doit); \
\
/* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \
_FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits); \
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \
_FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \
_FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, wfracbits-1, 2*wfracbits); \
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0)); \
} \
while (0)
@ -371,16 +452,18 @@
#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
do \
{ \
_FP_FRAC_DECL_8 (_z); \
_FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z); \
\
_FP_MUL_MEAT_DW_4_gmp (wfracbits, _z, X, Y); \
_FP_MUL_MEAT_DW_4_gmp (wfracbits, _FP_MUL_MEAT_4_gmp_z, X, Y); \
\
/* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \
_FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits); \
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \
_FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \
_FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, wfracbits-1, 2*wfracbits); \
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0)); \
} \
while (0)
@ -391,12 +474,12 @@
#define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \
do \
{ \
UWtype _t; \
UWtype umul_ppppmnnn_t; \
umul_ppmm (p1, p0, m, n0); \
umul_ppmm (p2, _t, m, n1); \
__FP_FRAC_ADDI_2 (p2, p1, _t); \
umul_ppmm (p3, _t, m, n2); \
__FP_FRAC_ADDI_2 (p3, p2, _t); \
umul_ppmm (p2, umul_ppppmnnn_t, m, n1); \
__FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t); \
umul_ppmm (p3, umul_ppppmnnn_t, m, n2); \
__FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t); \
} \
while (0)
@ -407,13 +490,14 @@
#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
do \
{ \
int _i; \
_FP_FRAC_DECL_4 (_n); \
_FP_FRAC_DECL_4 (_m); \
_FP_FRAC_SET_4 (_n, _FP_ZEROFRAC_4); \
int _FP_DIV_MEAT_4_udiv_i; \
_FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n); \
_FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m); \
_FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4); \
if (_FP_FRAC_GE_4 (X, Y)) \
{ \
_n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
_FP_DIV_MEAT_4_udiv_n_f[3] \
= X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
_FP_FRAC_SRL_4 (X, 1); \
} \
else \
@ -423,49 +507,58 @@
denominator set. */ \
_FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \
\
for (_i = 3; ; _i--) \
for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--) \
{ \
if (X##_f[3] == Y##_f[3]) \
{ \
/* This is a special case, not an optimization \
(X##_f[3]/Y##_f[3] would not fit into UWtype). \
As X## is guaranteed to be < Y, R##_f[_i] can be either \
As X## is guaranteed to be < Y, \
R##_f[_FP_DIV_MEAT_4_udiv_i] can be either \
(UWtype)-1 or (UWtype)-2. */ \
R##_f[_i] = -1; \
if (!_i) \
R##_f[_FP_DIV_MEAT_4_udiv_i] = -1; \
if (!_FP_DIV_MEAT_4_udiv_i) \
break; \
__FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
Y##_f[2], Y##_f[1], Y##_f[0], 0, \
X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \
X##_f[2], X##_f[1], X##_f[0], \
_FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \
_FP_FRAC_SUB_4 (X, Y, X); \
if (X##_f[3] > Y##_f[3]) \
{ \
R##_f[_i] = -2; \
R##_f[_FP_DIV_MEAT_4_udiv_i] = -2; \
_FP_FRAC_ADD_4 (X, Y, X); \
} \
} \
else \
{ \
udiv_qrnnd (R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
umul_ppppmnnn (_m_f[3], _m_f[2], _m_f[1], _m_f[0], \
R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \
udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i], \
X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3], \
_FP_DIV_MEAT_4_udiv_m_f[2], \
_FP_DIV_MEAT_4_udiv_m_f[1], \
_FP_DIV_MEAT_4_udiv_m_f[0], \
R##_f[_FP_DIV_MEAT_4_udiv_i], \
Y##_f[2], Y##_f[1], Y##_f[0]); \
X##_f[2] = X##_f[1]; \
X##_f[1] = X##_f[0]; \
X##_f[0] = _n_f[_i]; \
if (_FP_FRAC_GT_4 (_m, X)) \
X##_f[0] \
= _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]; \
if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
{ \
R##_f[_i]--; \
R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
_FP_FRAC_ADD_4 (X, Y, X); \
if (_FP_FRAC_GE_4 (X, Y) && _FP_FRAC_GT_4 (_m, X)) \
if (_FP_FRAC_GE_4 (X, Y) \
&& _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
{ \
R##_f[_i]--; \
R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
_FP_FRAC_ADD_4 (X, Y, X); \
} \
} \
_FP_FRAC_DEC_4 (X, _m); \
if (!_i) \
_FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m); \
if (!_FP_DIV_MEAT_4_udiv_i) \
{ \
if (!_FP_FRAC_EQ_4 (X, _m)) \
if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m)) \
R##_f[0] |= _FP_WORK_STICKY; \
break; \
} \
@ -591,18 +684,19 @@
# define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
do \
{ \
_FP_W_TYPE _c1, _c2, _c3; \
_FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2; \
_FP_W_TYPE __FP_FRAC_ADD_4_c3; \
r0 = x0 + y0; \
_c1 = r0 < x0; \
__FP_FRAC_ADD_4_c1 = r0 < x0; \
r1 = x1 + y1; \
_c2 = r1 < x1; \
r1 += _c1; \
_c2 |= r1 < _c1; \
__FP_FRAC_ADD_4_c2 = r1 < x1; \
r1 += __FP_FRAC_ADD_4_c1; \
__FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1; \
r2 = x2 + y2; \
_c3 = r2 < x2; \
r2 += _c2; \
_c3 |= r2 < _c2; \
r3 = x3 + y3 + _c3; \
__FP_FRAC_ADD_4_c3 = r2 < x2; \
r2 += __FP_FRAC_ADD_4_c2; \
__FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2; \
r3 = x3 + y3 + __FP_FRAC_ADD_4_c3; \
} \
while (0)
#endif
@ -611,14 +705,14 @@
# define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
do \
{ \
_FP_W_TYPE _c1, _c2; \
_FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2; \
r0 = x0 - y0; \
_c1 = r0 > x0; \
__FP_FRAC_SUB_3_c1 = r0 > x0; \
r1 = x1 - y1; \
_c2 = r1 > x1; \
r1 -= _c1; \
_c2 |= _c1 && (y1 == x1); \
r2 = x2 - y2 - _c2; \
__FP_FRAC_SUB_3_c2 = r1 > x1; \
r1 -= __FP_FRAC_SUB_3_c1; \
__FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1); \
r2 = x2 - y2 - __FP_FRAC_SUB_3_c2; \
} \
while (0)
#endif
@ -627,18 +721,19 @@
# define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
do \
{ \
_FP_W_TYPE _c1, _c2, _c3; \
_FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2; \
_FP_W_TYPE __FP_FRAC_SUB_4_c3; \
r0 = x0 - y0; \
_c1 = r0 > x0; \
__FP_FRAC_SUB_4_c1 = r0 > x0; \
r1 = x1 - y1; \
_c2 = r1 > x1; \
r1 -= _c1; \
_c2 |= _c1 && (y1 == x1); \
__FP_FRAC_SUB_4_c2 = r1 > x1; \
r1 -= __FP_FRAC_SUB_4_c1; \
__FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1); \
r2 = x2 - y2; \
_c3 = r2 > x2; \
r2 -= _c2; \
_c3 |= _c2 && (y2 == x2); \
r3 = x3 - y3 - _c3; \
__FP_FRAC_SUB_4_c3 = r2 > x2; \
r2 -= __FP_FRAC_SUB_4_c2; \
__FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2); \
r3 = x3 - y3 - __FP_FRAC_SUB_4_c3; \
} \
while (0)
#endif
@ -647,9 +742,14 @@
# define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \
do \
{ \
UWtype _t0, _t1, _t2; \
_t0 = x0, _t1 = x1, _t2 = x2; \
__FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \
UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1; \
UWtype __FP_FRAC_DEC_3_t2; \
__FP_FRAC_DEC_3_t0 = x0; \
__FP_FRAC_DEC_3_t1 = x1; \
__FP_FRAC_DEC_3_t2 = x2; \
__FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2, \
__FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0, \
y2, y1, y0); \
} \
while (0)
#endif
@ -658,9 +758,15 @@
# define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \
do \
{ \
UWtype _t0, _t1, _t2, _t3; \
_t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \
__FP_FRAC_SUB_4 (x3, x2, x1, x0, _t3, _t2, _t1, _t0, y3, y2, y1, y0); \
UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1; \
UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3; \
__FP_FRAC_DEC_4_t0 = x0; \
__FP_FRAC_DEC_4_t1 = x1; \
__FP_FRAC_DEC_4_t2 = x2; \
__FP_FRAC_DEC_4_t3 = x3; \
__FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3, \
__FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1, \
__FP_FRAC_DEC_4_t0, y3, y2, y1, y0); \
} \
while (0)
#endif
@ -669,13 +775,13 @@
# define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \
do \
{ \
UWtype _t; \
_t = ((x0 += i) < i); \
x1 += _t; \
_t = (x1 < _t); \
x2 += _t; \
_t = (x2 < _t); \
x3 += _t; \
UWtype __FP_FRAC_ADDI_4_t; \
__FP_FRAC_ADDI_4_t = ((x0 += i) < i); \
x1 += __FP_FRAC_ADDI_4_t; \
__FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t); \
x2 += __FP_FRAC_ADDI_4_t; \
__FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t); \
x3 += __FP_FRAC_ADDI_4_t; \
} \
while (0)
#endif

View File

@ -39,44 +39,62 @@
#define _FP_FRAC_SLL_8(X, N) \
do \
{ \
_FP_I_TYPE _up, _down, _skip, _i; \
_skip = (N) / _FP_W_TYPE_SIZE; \
_up = (N) % _FP_W_TYPE_SIZE; \
_down = _FP_W_TYPE_SIZE - _up; \
if (!_up) \
for (_i = 7; _i >= _skip; --_i) \
X##_f[_i] = X##_f[_i-_skip]; \
_FP_I_TYPE _FP_FRAC_SLL_8_up, _FP_FRAC_SLL_8_down; \
_FP_I_TYPE _FP_FRAC_SLL_8_skip, _FP_FRAC_SLL_8_i; \
_FP_FRAC_SLL_8_skip = (N) / _FP_W_TYPE_SIZE; \
_FP_FRAC_SLL_8_up = (N) % _FP_W_TYPE_SIZE; \
_FP_FRAC_SLL_8_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_8_up; \
if (!_FP_FRAC_SLL_8_up) \
for (_FP_FRAC_SLL_8_i = 7; \
_FP_FRAC_SLL_8_i >= _FP_FRAC_SLL_8_skip; \
--_FP_FRAC_SLL_8_i) \
X##_f[_FP_FRAC_SLL_8_i] \
= X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip]; \
else \
{ \
for (_i = 7; _i > _skip; --_i) \
X##_f[_i] = (X##_f[_i-_skip] << _up \
| X##_f[_i-_skip-1] >> _down); \
X##_f[_i--] = X##_f[0] << _up; \
for (_FP_FRAC_SLL_8_i = 7; \
_FP_FRAC_SLL_8_i > _FP_FRAC_SLL_8_skip; \
--_FP_FRAC_SLL_8_i) \
X##_f[_FP_FRAC_SLL_8_i] \
= ((X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip] \
<< _FP_FRAC_SLL_8_up) \
| (X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip-1] \
>> _FP_FRAC_SLL_8_down)); \
X##_f[_FP_FRAC_SLL_8_i--] = X##_f[0] << _FP_FRAC_SLL_8_up; \
} \
for (; _i >= 0; --_i) \
X##_f[_i] = 0; \
for (; _FP_FRAC_SLL_8_i >= 0; --_FP_FRAC_SLL_8_i) \
X##_f[_FP_FRAC_SLL_8_i] = 0; \
} \
while (0)
#define _FP_FRAC_SRL_8(X, N) \
do \
{ \
_FP_I_TYPE _up, _down, _skip, _i; \
_skip = (N) / _FP_W_TYPE_SIZE; \
_down = (N) % _FP_W_TYPE_SIZE; \
_up = _FP_W_TYPE_SIZE - _down; \
if (!_down) \
for (_i = 0; _i <= 7-_skip; ++_i) \
X##_f[_i] = X##_f[_i+_skip]; \
_FP_I_TYPE _FP_FRAC_SRL_8_up, _FP_FRAC_SRL_8_down; \
_FP_I_TYPE _FP_FRAC_SRL_8_skip, _FP_FRAC_SRL_8_i; \
_FP_FRAC_SRL_8_skip = (N) / _FP_W_TYPE_SIZE; \
_FP_FRAC_SRL_8_down = (N) % _FP_W_TYPE_SIZE; \
_FP_FRAC_SRL_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_8_down; \
if (!_FP_FRAC_SRL_8_down) \
for (_FP_FRAC_SRL_8_i = 0; \
_FP_FRAC_SRL_8_i <= 7-_FP_FRAC_SRL_8_skip; \
++_FP_FRAC_SRL_8_i) \
X##_f[_FP_FRAC_SRL_8_i] \
= X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip]; \
else \
{ \
for (_i = 0; _i < 7-_skip; ++_i) \
X##_f[_i] = (X##_f[_i+_skip] >> _down \
| X##_f[_i+_skip+1] << _up); \
X##_f[_i++] = X##_f[7] >> _down; \
for (_FP_FRAC_SRL_8_i = 0; \
_FP_FRAC_SRL_8_i < 7-_FP_FRAC_SRL_8_skip; \
++_FP_FRAC_SRL_8_i) \
X##_f[_FP_FRAC_SRL_8_i] \
= ((X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip] \
>> _FP_FRAC_SRL_8_down) \
| (X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip+1] \
<< _FP_FRAC_SRL_8_up)); \
X##_f[_FP_FRAC_SRL_8_i++] = X##_f[7] >> _FP_FRAC_SRL_8_down; \
} \
for (; _i < 8; ++_i) \
X##_f[_i] = 0; \
for (; _FP_FRAC_SRL_8_i < 8; ++_FP_FRAC_SRL_8_i) \
X##_f[_FP_FRAC_SRL_8_i] = 0; \
} \
while (0)
@ -89,28 +107,40 @@
#define _FP_FRAC_SRS_8(X, N, size) \
do \
{ \
_FP_I_TYPE _up, _down, _skip, _i; \
_FP_W_TYPE _s; \
_skip = (N) / _FP_W_TYPE_SIZE; \
_down = (N) % _FP_W_TYPE_SIZE; \
_up = _FP_W_TYPE_SIZE - _down; \
for (_s = _i = 0; _i < _skip; ++_i) \
_s |= X##_f[_i]; \
if (!_down) \
for (_i = 0; _i <= 7-_skip; ++_i) \
X##_f[_i] = X##_f[_i+_skip]; \
_FP_I_TYPE _FP_FRAC_SRS_8_up, _FP_FRAC_SRS_8_down; \
_FP_I_TYPE _FP_FRAC_SRS_8_skip, _FP_FRAC_SRS_8_i; \
_FP_W_TYPE _FP_FRAC_SRS_8_s; \
_FP_FRAC_SRS_8_skip = (N) / _FP_W_TYPE_SIZE; \
_FP_FRAC_SRS_8_down = (N) % _FP_W_TYPE_SIZE; \
_FP_FRAC_SRS_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRS_8_down; \
for (_FP_FRAC_SRS_8_s = _FP_FRAC_SRS_8_i = 0; \
_FP_FRAC_SRS_8_i < _FP_FRAC_SRS_8_skip; \
++_FP_FRAC_SRS_8_i) \
_FP_FRAC_SRS_8_s |= X##_f[_FP_FRAC_SRS_8_i]; \
if (!_FP_FRAC_SRS_8_down) \
for (_FP_FRAC_SRS_8_i = 0; \
_FP_FRAC_SRS_8_i <= 7-_FP_FRAC_SRS_8_skip; \
++_FP_FRAC_SRS_8_i) \
X##_f[_FP_FRAC_SRS_8_i] \
= X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip]; \
else \
{ \
_s |= X##_f[_i] << _up; \
for (_i = 0; _i < 7-_skip; ++_i) \
X##_f[_i] = (X##_f[_i+_skip] >> _down \
| X##_f[_i+_skip+1] << _up); \
X##_f[_i++] = X##_f[7] >> _down; \
_FP_FRAC_SRS_8_s \
|= X##_f[_FP_FRAC_SRS_8_i] << _FP_FRAC_SRS_8_up; \
for (_FP_FRAC_SRS_8_i = 0; \
_FP_FRAC_SRS_8_i < 7-_FP_FRAC_SRS_8_skip; \
++_FP_FRAC_SRS_8_i) \
X##_f[_FP_FRAC_SRS_8_i] \
= ((X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip] \
>> _FP_FRAC_SRS_8_down) \
| (X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip+1] \
<< _FP_FRAC_SRS_8_up)); \
X##_f[_FP_FRAC_SRS_8_i++] = X##_f[7] >> _FP_FRAC_SRS_8_down; \
} \
for (; _i < 8; ++_i) \
X##_f[_i] = 0; \
for (; _FP_FRAC_SRS_8_i < 8; ++_FP_FRAC_SRS_8_i) \
X##_f[_FP_FRAC_SRS_8_i] = 0; \
/* don't fix the LSB until the very end when we're sure f[0] is \
stable */ \
X##_f[0] |= (_s != 0); \
X##_f[0] |= (_FP_FRAC_SRS_8_s != 0); \
} \
while (0)