A64: Fix a few simulation inaccuracies.

- Return the correct NaN when an invalid operation generates a NaN.
  - When one or more operands are NaN, handle them as the processor
    would, prioritising signalling NaNs and making them quiet.
  - Fix fmadd and related instructions:
     - Fnmadd is fma(-n, m, -a), not -fma(n, m, a).
     - Some common libc implementations incorrectly implement fma for
       zero results, so work around these cases.
  - Replace some unreliable tests.

This patch also adds support for Default-NaN mode, since once all the
other work was done, it only required a couple of lines of code.
Default-NaN mode was used for an optimisation in ARM, and it should now
be possible to apply the same optimisation to A64.

BUG=
R=jochen@chromium.org

Review URL: https://codereview.chromium.org/199083005

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19927 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
jacob.bramley@arm.com 2014-03-14 10:23:55 +00:00
parent 11df4b8815
commit cf43195d47
6 changed files with 1326 additions and 351 deletions

View File

@ -85,12 +85,18 @@ const int64_t kSRegMask = 0x00000000ffffffffL;
const int64_t kDRegMask = 0xffffffffffffffffL;
// TODO(all) check if the expression below works on all compilers or if it
// triggers an overflow error.
const int64_t kDSignMask = 0x1L << 63;
const int64_t kDSignBit = 63;
const int64_t kXSignMask = 0x1L << 63;
const int64_t kDSignMask = 0x1L << kDSignBit;
const int64_t kSSignBit = 31;
const int64_t kSSignMask = 0x1L << kSSignBit;
const int64_t kXSignBit = 63;
const int64_t kWSignMask = 0x1L << 31;
const int64_t kXSignMask = 0x1L << kXSignBit;
const int64_t kWSignBit = 31;
const int64_t kWSignMask = 0x1L << kWSignBit;
const int64_t kDQuietNanBit = 51;
const int64_t kDQuietNanMask = 0x1L << kDQuietNanBit;
const int64_t kSQuietNanBit = 22;
const int64_t kSQuietNanMask = 0x1L << kSQuietNanBit;
const int64_t kByteMask = 0xffL;
const int64_t kHalfWordMask = 0xffffL;
const int64_t kWordMask = 0xffffffffL;

View File

@ -67,6 +67,10 @@ DEFINE_FLOAT(kFP32SignallingNaN, 0x7f800001);
DEFINE_DOUBLE(kFP64QuietNaN, 0x7ff800007fc00001);
DEFINE_FLOAT(kFP32QuietNaN, 0x7fc00001);
// The default NaN values (for FPCR.DN=1).
DEFINE_DOUBLE(kFP64DefaultNaN, 0x7ff8000000000000UL);
DEFINE_FLOAT(kFP32DefaultNaN, 0x7fc00000);
#undef DEFINE_FLOAT
#undef DEFINE_DOUBLE

View File

@ -731,6 +731,16 @@ int64_t Simulator::ExtendValue(unsigned reg_size,
}
template<> double Simulator::FPDefaultNaN<double>() const {
return kFP64DefaultNaN;
}
template<> float Simulator::FPDefaultNaN<float>() const {
return kFP32DefaultNaN;
}
void Simulator::FPCompare(double val0, double val1) {
AssertSupportedFPCR();
@ -2100,8 +2110,8 @@ uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
void Simulator::VisitFPCompare(Instruction* instr) {
AssertSupportedFPCR();
unsigned reg_size = instr->FPType() == FP32 ? kSRegSizeInBits
: kDRegSizeInBits;
unsigned reg_size = (instr->Mask(FP64) == FP64) ? kDRegSizeInBits
: kSRegSizeInBits;
double fn_val = fpreg(reg_size, instr->Rn());
switch (instr->Mask(FPCompareMask)) {
@ -2123,8 +2133,8 @@ void Simulator::VisitFPConditionalCompare(Instruction* instr) {
if (ConditionPassed(static_cast<Condition>(instr->Condition()))) {
// If the condition passes, set the status flags to the result of
// comparing the operands.
unsigned reg_size = instr->FPType() == FP32 ? kSRegSizeInBits
: kDRegSizeInBits;
unsigned reg_size = (instr->Mask(FP64) == FP64) ? kDRegSizeInBits
: kSRegSizeInBits;
FPCompare(fpreg(reg_size, instr->Rn()), fpreg(reg_size, instr->Rm()));
} else {
// If the condition fails, set the status flags to the nzcv immediate.
@ -2168,8 +2178,8 @@ void Simulator::VisitFPDataProcessing1Source(Instruction* instr) {
case FABS_d: set_dreg(fd, std::fabs(dreg(fn))); break;
case FNEG_s: set_sreg(fd, -sreg(fn)); break;
case FNEG_d: set_dreg(fd, -dreg(fn)); break;
case FSQRT_s: set_sreg(fd, std::sqrt(sreg(fn))); break;
case FSQRT_d: set_dreg(fd, std::sqrt(dreg(fn))); break;
case FSQRT_s: set_sreg(fd, FPSqrt(sreg(fn))); break;
case FSQRT_d: set_dreg(fd, FPSqrt(dreg(fn))); break;
case FRINTA_s: set_sreg(fd, FPRoundInt(sreg(fn), FPTieAway)); break;
case FRINTA_d: set_dreg(fd, FPRoundInt(dreg(fn), FPTieAway)); break;
case FRINTN_s: set_sreg(fd, FPRoundInt(sreg(fn), FPTieEven)); break;
@ -2428,8 +2438,10 @@ float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
double Simulator::FPRoundInt(double value, FPRounding round_mode) {
if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
(value == kFP64NegativeInfinity) || std::isnan(value)) {
(value == kFP64NegativeInfinity)) {
return value;
} else if (std::isnan(value)) {
return FPProcessNaN(value);
}
double int_result = floor(value);
@ -2473,8 +2485,9 @@ double Simulator::FPRoundInt(double value, FPRounding round_mode) {
double Simulator::FPToDouble(float value) {
switch (std::fpclassify(value)) {
case FP_NAN: {
// Convert NaNs as the processor would, assuming that FPCR.DN (default
// NaN) is not set:
if (DN()) return kFP64DefaultNaN;
// Convert NaNs as the processor would:
// - The sign is propagated.
// - The payload (mantissa) is transferred entirely, except that the top
// bit is forced to '1', making the result a quiet NaN. The unused
@ -2513,8 +2526,9 @@ float Simulator::FPToFloat(double value, FPRounding round_mode) {
switch (std::fpclassify(value)) {
case FP_NAN: {
// Convert NaNs as the processor would, assuming that FPCR.DN (default
// NaN) is not set:
if (DN()) return kFP32DefaultNaN;
// Convert NaNs as the processor would:
// - The sign is propagated.
// - The payload (mantissa) is transferred as much as possible, except
// that the top bit is forced to '1', making the result a quiet NaN.
@ -2565,23 +2579,37 @@ void Simulator::VisitFPDataProcessing2Source(Instruction* instr) {
unsigned fn = instr->Rn();
unsigned fm = instr->Rm();
// Fmaxnm and Fminnm have special NaN handling.
switch (instr->Mask(FPDataProcessing2SourceMask)) {
case FADD_s: set_sreg(fd, sreg(fn) + sreg(fm)); break;
case FADD_d: set_dreg(fd, dreg(fn) + dreg(fm)); break;
case FSUB_s: set_sreg(fd, sreg(fn) - sreg(fm)); break;
case FSUB_d: set_dreg(fd, dreg(fn) - dreg(fm)); break;
case FMUL_s: set_sreg(fd, sreg(fn) * sreg(fm)); break;
case FMUL_d: set_dreg(fd, dreg(fn) * dreg(fm)); break;
case FDIV_s: set_sreg(fd, sreg(fn) / sreg(fm)); break;
case FDIV_d: set_dreg(fd, dreg(fn) / dreg(fm)); break;
case FMAXNM_s: set_sreg(fd, FPMaxNM(sreg(fn), sreg(fm))); return;
case FMAXNM_d: set_dreg(fd, FPMaxNM(dreg(fn), dreg(fm))); return;
case FMINNM_s: set_sreg(fd, FPMinNM(sreg(fn), sreg(fm))); return;
case FMINNM_d: set_dreg(fd, FPMinNM(dreg(fn), dreg(fm))); return;
default:
break; // Fall through.
}
if (FPProcessNaNs(instr)) return;
switch (instr->Mask(FPDataProcessing2SourceMask)) {
case FADD_s: set_sreg(fd, FPAdd(sreg(fn), sreg(fm))); break;
case FADD_d: set_dreg(fd, FPAdd(dreg(fn), dreg(fm))); break;
case FSUB_s: set_sreg(fd, FPSub(sreg(fn), sreg(fm))); break;
case FSUB_d: set_dreg(fd, FPSub(dreg(fn), dreg(fm))); break;
case FMUL_s: set_sreg(fd, FPMul(sreg(fn), sreg(fm))); break;
case FMUL_d: set_dreg(fd, FPMul(dreg(fn), dreg(fm))); break;
case FDIV_s: set_sreg(fd, FPDiv(sreg(fn), sreg(fm))); break;
case FDIV_d: set_dreg(fd, FPDiv(dreg(fn), dreg(fm))); break;
case FMAX_s: set_sreg(fd, FPMax(sreg(fn), sreg(fm))); break;
case FMAX_d: set_dreg(fd, FPMax(dreg(fn), dreg(fm))); break;
case FMIN_s: set_sreg(fd, FPMin(sreg(fn), sreg(fm))); break;
case FMIN_d: set_dreg(fd, FPMin(dreg(fn), dreg(fm))); break;
case FMAXNM_s: set_sreg(fd, FPMaxNM(sreg(fn), sreg(fm))); break;
case FMAXNM_d: set_dreg(fd, FPMaxNM(dreg(fn), dreg(fm))); break;
case FMINNM_s: set_sreg(fd, FPMinNM(sreg(fn), sreg(fm))); break;
case FMINNM_d: set_dreg(fd, FPMinNM(dreg(fn), dreg(fm))); break;
case FMAXNM_s:
case FMAXNM_d:
case FMINNM_s:
case FMINNM_d:
// These were handled before the standard FPProcessNaNs() stage.
UNREACHABLE();
default: UNIMPLEMENTED();
}
}
@ -2598,33 +2626,62 @@ void Simulator::VisitFPDataProcessing3Source(Instruction* instr) {
// The C99 (and C++11) fma function performs a fused multiply-accumulate.
switch (instr->Mask(FPDataProcessing3SourceMask)) {
// fd = fa +/- (fn * fm)
case FMADD_s: set_sreg(fd, fmaf(sreg(fn), sreg(fm), sreg(fa))); break;
case FMSUB_s: set_sreg(fd, fmaf(-sreg(fn), sreg(fm), sreg(fa))); break;
case FMADD_d: set_dreg(fd, fma(dreg(fn), dreg(fm), dreg(fa))); break;
case FMSUB_d: set_dreg(fd, fma(-dreg(fn), dreg(fm), dreg(fa))); break;
// Variants of the above where the result is negated.
case FNMADD_s: set_sreg(fd, -fmaf(sreg(fn), sreg(fm), sreg(fa))); break;
case FNMSUB_s: set_sreg(fd, -fmaf(-sreg(fn), sreg(fm), sreg(fa))); break;
case FNMADD_d: set_dreg(fd, -fma(dreg(fn), dreg(fm), dreg(fa))); break;
case FNMSUB_d: set_dreg(fd, -fma(-dreg(fn), dreg(fm), dreg(fa))); break;
case FMADD_s: set_sreg(fd, FPMulAdd(sreg(fa), sreg(fn), sreg(fm))); break;
case FMSUB_s: set_sreg(fd, FPMulAdd(sreg(fa), -sreg(fn), sreg(fm))); break;
case FMADD_d: set_dreg(fd, FPMulAdd(dreg(fa), dreg(fn), dreg(fm))); break;
case FMSUB_d: set_dreg(fd, FPMulAdd(dreg(fa), -dreg(fn), dreg(fm))); break;
// Negated variants of the above.
case FNMADD_s:
set_sreg(fd, FPMulAdd(-sreg(fa), -sreg(fn), sreg(fm)));
break;
case FNMSUB_s:
set_sreg(fd, FPMulAdd(-sreg(fa), sreg(fn), sreg(fm)));
break;
case FNMADD_d:
set_dreg(fd, FPMulAdd(-dreg(fa), -dreg(fn), dreg(fm)));
break;
case FNMSUB_d:
set_dreg(fd, FPMulAdd(-dreg(fa), dreg(fn), dreg(fm)));
break;
default: UNIMPLEMENTED();
}
}
template <typename T>
T Simulator::FPMax(T a, T b) {
if (IsSignallingNaN(a)) {
return a;
} else if (IsSignallingNaN(b)) {
return b;
} else if (std::isnan(a)) {
ASSERT(IsQuietNaN(a));
return a;
} else if (std::isnan(b)) {
ASSERT(IsQuietNaN(b));
return b;
T Simulator::FPAdd(T op1, T op2) {
// NaNs should be handled elsewhere.
ASSERT(!std::isnan(op1) && !std::isnan(op2));
if (isinf(op1) && isinf(op2) && (op1 != op2)) {
// inf + -inf returns the default NaN.
return FPDefaultNaN<T>();
} else {
// Other cases should be handled by standard arithmetic.
return op1 + op2;
}
}
template <typename T>
T Simulator::FPDiv(T op1, T op2) {
// NaNs should be handled elsewhere.
ASSERT(!std::isnan(op1) && !std::isnan(op2));
if ((isinf(op1) && isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
// inf / inf and 0.0 / 0.0 return the default NaN.
return FPDefaultNaN<T>();
} else {
// Other cases should be handled by standard arithmetic.
return op1 / op2;
}
}
template <typename T>
T Simulator::FPMax(T a, T b) {
// NaNs should be handled elsewhere.
ASSERT(!std::isnan(a) && !std::isnan(b));
if ((a == 0.0) && (b == 0.0) &&
(copysign(1.0, a) != copysign(1.0, b))) {
@ -2643,22 +2700,15 @@ T Simulator::FPMaxNM(T a, T b) {
} else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
b = kFP64NegativeInfinity;
}
return FPMax(a, b);
T result = FPProcessNaNs(a, b);
return std::isnan(result) ? result : FPMax(a, b);
}
template <typename T>
T Simulator::FPMin(T a, T b) {
if (IsSignallingNaN(a)) {
return a;
} else if (IsSignallingNaN(b)) {
return b;
} else if (std::isnan(a)) {
ASSERT(IsQuietNaN(a));
return a;
} else if (std::isnan(b)) {
ASSERT(IsQuietNaN(b));
return b;
}
// NaNs should be handled elsewhere.
ASSERT(!isnan(a) && !isnan(b));
if ((a == 0.0) && (b == 0.0) &&
(copysign(1.0, a) != copysign(1.0, b))) {
@ -2677,7 +2727,168 @@ T Simulator::FPMinNM(T a, T b) {
} else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
b = kFP64PositiveInfinity;
}
return FPMin(a, b);
T result = FPProcessNaNs(a, b);
return isnan(result) ? result : FPMin(a, b);
}
template <typename T>
T Simulator::FPMul(T op1, T op2) {
// NaNs should be handled elsewhere.
ASSERT(!std::isnan(op1) && !std::isnan(op2));
if ((isinf(op1) && (op2 == 0.0)) || (isinf(op2) && (op1 == 0.0))) {
// inf * 0.0 returns the default NaN.
return FPDefaultNaN<T>();
} else {
// Other cases should be handled by standard arithmetic.
return op1 * op2;
}
}
template<typename T>
T Simulator::FPMulAdd(T a, T op1, T op2) {
T result = FPProcessNaNs3(a, op1, op2);
T sign_a = copysign(1.0, a);
T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
bool isinf_prod = std::isinf(op1) || std::isinf(op2);
bool operation_generates_nan =
(std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
(std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
(std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
if (std::isnan(result)) {
// Generated NaNs override quiet NaNs propagated from a.
if (operation_generates_nan && IsQuietNaN(a)) {
return FPDefaultNaN<T>();
} else {
return result;
}
}
// If the operation would produce a NaN, return the default NaN.
if (operation_generates_nan) {
return FPDefaultNaN<T>();
}
// Work around broken fma implementations for exact zero results: The sign of
// exact 0.0 results is positive unless both a and op1 * op2 are negative.
if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
}
result = FusedMultiplyAdd(op1, op2, a);
ASSERT(!std::isnan(result));
// Work around broken fma implementations for rounded zero results: If a is
// 0.0, the sign of the result is the sign of op1 * op2 before rounding.
if ((a == 0.0) && (result == 0.0)) {
return copysign(0.0, sign_prod);
}
return result;
}
template <typename T>
T Simulator::FPSqrt(T op) {
if (std::isnan(op)) {
return FPProcessNaN(op);
} else if (op < 0.0) {
return FPDefaultNaN<T>();
} else {
return std::sqrt(op);
}
}
template <typename T>
T Simulator::FPSub(T op1, T op2) {
// NaNs should be handled elsewhere.
ASSERT(!std::isnan(op1) && !std::isnan(op2));
if (isinf(op1) && isinf(op2) && (op1 == op2)) {
// inf - inf returns the default NaN.
return FPDefaultNaN<T>();
} else {
// Other cases should be handled by standard arithmetic.
return op1 - op2;
}
}
template <typename T>
T Simulator::FPProcessNaN(T op) {
ASSERT(std::isnan(op));
return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
}
template <typename T>
T Simulator::FPProcessNaNs(T op1, T op2) {
if (IsSignallingNaN(op1)) {
return FPProcessNaN(op1);
} else if (IsSignallingNaN(op2)) {
return FPProcessNaN(op2);
} else if (std::isnan(op1)) {
ASSERT(IsQuietNaN(op1));
return FPProcessNaN(op1);
} else if (std::isnan(op2)) {
ASSERT(IsQuietNaN(op2));
return FPProcessNaN(op2);
} else {
return 0.0;
}
}
template <typename T>
T Simulator::FPProcessNaNs3(T op1, T op2, T op3) {
if (IsSignallingNaN(op1)) {
return FPProcessNaN(op1);
} else if (IsSignallingNaN(op2)) {
return FPProcessNaN(op2);
} else if (IsSignallingNaN(op3)) {
return FPProcessNaN(op3);
} else if (std::isnan(op1)) {
ASSERT(IsQuietNaN(op1));
return FPProcessNaN(op1);
} else if (std::isnan(op2)) {
ASSERT(IsQuietNaN(op2));
return FPProcessNaN(op2);
} else if (std::isnan(op3)) {
ASSERT(IsQuietNaN(op3));
return FPProcessNaN(op3);
} else {
return 0.0;
}
}
bool Simulator::FPProcessNaNs(Instruction* instr) {
unsigned fd = instr->Rd();
unsigned fn = instr->Rn();
unsigned fm = instr->Rm();
bool done = false;
if (instr->Mask(FP64) == FP64) {
double result = FPProcessNaNs(dreg(fn), dreg(fm));
if (std::isnan(result)) {
set_dreg(fd, result);
done = true;
}
} else {
float result = FPProcessNaNs(sreg(fn), sreg(fm));
if (std::isnan(result)) {
set_sreg(fd, result);
done = true;
}
}
return done;
}

View File

@ -537,8 +537,9 @@ class Simulator : public DecoderVisitor {
SimSystemRegister& nzcv() { return nzcv_; }
// TODO(jbramley): Find a way to make the fpcr_ members return the proper
// types, so this accessor is not necessary.
// types, so these accessors are not necessary.
FPRounding RMode() { return static_cast<FPRounding>(fpcr_.RMode()); }
bool DN() { return fpcr_.DN() != 0; }
SimSystemRegister& fpcr() { return fpcr_; }
// Debug helpers
@ -707,6 +708,9 @@ class Simulator : public DecoderVisitor {
uint64_t ReverseBits(uint64_t value, unsigned num_bits);
uint64_t ReverseBytes(uint64_t value, ReverseByteMode mode);
template <typename T>
T FPDefaultNaN() const;
void FPCompare(double val0, double val1);
double FPRoundInt(double value, FPRounding round_mode);
double FPToDouble(float value);
@ -721,17 +725,47 @@ class Simulator : public DecoderVisitor {
uint64_t FPToUInt64(double value, FPRounding rmode);
template <typename T>
T FPMax(T a, T b);
T FPAdd(T op1, T op2);
template <typename T>
T FPMin(T a, T b);
T FPDiv(T op1, T op2);
template <typename T>
T FPMax(T a, T b);
template <typename T>
T FPMaxNM(T a, T b);
template <typename T>
T FPMin(T a, T b);
template <typename T>
T FPMinNM(T a, T b);
template <typename T>
T FPMul(T op1, T op2);
template <typename T>
T FPMulAdd(T a, T op1, T op2);
template <typename T>
T FPSqrt(T op);
template <typename T>
T FPSub(T op1, T op2);
// Standard NaN processing.
template <typename T>
T FPProcessNaN(T op);
bool FPProcessNaNs(Instruction* instr);
template <typename T>
T FPProcessNaNs(T op1, T op2);
template <typename T>
T FPProcessNaNs3(T op1, T op2, T op3);
void CheckStackAlignment();
inline void CheckPCSComplianceAndRun();
@ -784,7 +818,6 @@ class Simulator : public DecoderVisitor {
// functions, or to save and restore it when entering and leaving generated
// code.
void AssertSupportedFPCR() {
ASSERT(fpcr().DN() == 0); // No default-NaN support.
ASSERT(fpcr().FZ() == 0); // No flush-to-zero support.
ASSERT(fpcr().RMode() == FPTieEven); // Ties-to-even rounding only.

View File

@ -70,7 +70,7 @@ static inline double rawbits_to_double(uint64_t bits) {
}
// Bits counting.
// Bit counting.
int CountLeadingZeros(uint64_t value, int width);
int CountLeadingSignBits(int64_t value, int width);
int CountTrailingZeros(uint64_t value, int width);
@ -80,9 +80,8 @@ int MaskToBit(uint64_t mask);
// NaN tests.
inline bool IsSignallingNaN(double num) {
const uint64_t kFP64QuietNaNMask = 0x0008000000000000UL;
uint64_t raw = double_to_rawbits(num);
if (std::isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) {
if (std::isnan(num) && ((raw & kDQuietNanMask) == 0)) {
return true;
}
return false;
@ -90,9 +89,8 @@ inline bool IsSignallingNaN(double num) {
inline bool IsSignallingNaN(float num) {
const uint64_t kFP32QuietNaNMask = 0x00400000UL;
uint32_t raw = float_to_rawbits(num);
if (std::isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) {
if (std::isnan(num) && ((raw & kSQuietNanMask) == 0)) {
return true;
}
return false;
@ -104,6 +102,30 @@ inline bool IsQuietNaN(T num) {
return std::isnan(num) && !IsSignallingNaN(num);
}
// Convert the NaN in 'num' to a quiet NaN.
inline double ToQuietNaN(double num) {
ASSERT(isnan(num));
return rawbits_to_double(double_to_rawbits(num) | kDQuietNanMask);
}
inline float ToQuietNaN(float num) {
ASSERT(isnan(num));
return rawbits_to_float(float_to_rawbits(num) | kSQuietNanMask);
}
// Fused multiply-add.
inline double FusedMultiplyAdd(double op1, double op2, double a) {
return fma(op1, op2, a);
}
inline float FusedMultiplyAdd(float op1, float op2, float a) {
return fmaf(op1, op2, a);
}
} } // namespace v8::internal
#endif // V8_A64_UTILS_A64_H_

File diff suppressed because it is too large Load Diff