[wasm simd] Make F32x4Min/Max IEEE 754 compliant

- Changes min and max sequences to propagate NaNs and signed
  zeroes.

- Note that NaN propagation must preserve canonical NaNs. This is
  achieved by always returning canonical NaNs. This is also
  consistent with the WebAssembly scalar math spec.

Bug: v8:8639
Change-Id: I04fdefabc54ea60f4d02e2081c32444a02dd6a83
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1524634
Commit-Queue: Bill Budge <bbudge@chromium.org>
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60414}
This commit is contained in:
Bill Budge 2019-03-21 13:30:49 -07:00 committed by Commit Bot
parent 41a4651af6
commit aa706218d6
14 changed files with 155 additions and 64 deletions

View File

@ -19,6 +19,7 @@
#include "src/objects-inl.h"
#include "src/ostreams.h"
#include "src/runtime/runtime-utils.h"
#include "src/utils.h"
#include "src/vector.h"
// Only build the simulator if not compiling for real ARM hardware.
@ -4181,10 +4182,8 @@ void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) {
}
float MinMax(float a, float b, bool is_min) {
if (std::isnan(a) || std::isnan(b)) return NAN;
return is_min ? fmin(a, b) : fmax(a, b);
return is_min ? JSMin(a, b) : JSMax(a, b);
}
template <typename T>
T MinMax(T a, T b, bool is_min) {
return is_min ? std::min(a, b) : std::max(a, b);

View File

@ -1952,48 +1952,73 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kSSEF32x4Min: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
// minps doesn't propagate NaN lanes in the first source. Compare this
// with itself to generate 1's in those lanes (quiet NaNs) and or them
// with the result of minps to simulate NaN propagation.
__ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
__ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
__ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ orps(i.OutputSimd128Register(), kScratchDoubleReg);
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The minps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minps in both orders, merge the resuls, and adjust.
__ movaps(kScratchDoubleReg, src1);
__ minps(kScratchDoubleReg, dst);
__ minps(dst, src1);
// propagate -0's and NaNs, which may be non-canonical.
__ orps(dst, kScratchDoubleReg);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ movaps(kScratchDoubleReg, dst);
__ cmpps(dst, dst, 4);
__ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg);
break;
}
case kAVXF32x4Min: {
CpuFeatureScope avx_scope(tasm(), AVX);
// See comment above for minps and NaN propagation.
__ vcmpneqps(kScratchDoubleReg, i.InputSimd128Register(0),
i.InputSimd128Register(0)); // Is NaN?
__ vminps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
__ vorps(i.OutputSimd128Register(), i.OutputSimd128Register(),
kScratchDoubleReg); // re-NaN-imate.
XMMRegister dst = i.OutputSimd128Register();
Operand src1 = i.InputOperand(1);
// See comment above for correction of minps.
__ movups(kScratchDoubleReg, src1);
__ vminps(kScratchDoubleReg, kScratchDoubleReg, dst);
__ vminps(dst, dst, src1);
__ vorps(dst, dst, kScratchDoubleReg);
__ vcmpneqps(kScratchDoubleReg, dst, dst);
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10);
__ vandnps(dst, kScratchDoubleReg, dst);
break;
}
case kSSEF32x4Max: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
// maxps doesn't propagate NaN lanes in the first source. Compare this
// with itself to generate 1's in those lanes (quiet NaNs) and or them
// with the result of maxps to simulate NaN propagation.
__ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
__ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
__ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ orps(i.OutputSimd128Register(), kScratchDoubleReg);
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxps in both orders, merge the resuls, and adjust.
__ movaps(kScratchDoubleReg, src1);
__ maxps(kScratchDoubleReg, dst);
__ maxps(dst, src1);
// Find discrepancies.
__ xorps(kScratchDoubleReg, dst);
// Propagate NaNs, which may be non-canonical.
__ orps(dst, kScratchDoubleReg);
// Propagate sign discrepancy. NaNs and correct lanes are preserved.
__ subps(dst, kScratchDoubleReg);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ movaps(kScratchDoubleReg, dst);
__ cmpps(dst, dst, 4);
__ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg);
break;
}
case kAVXF32x4Max: {
CpuFeatureScope avx_scope(tasm(), AVX);
// See comment above for maxps and NaN propagation.
__ vcmpneqps(kScratchDoubleReg, i.InputSimd128Register(0),
i.InputSimd128Register(0));
__ vmaxps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
__ vorps(i.OutputSimd128Register(), i.OutputSimd128Register(),
kScratchDoubleReg);
XMMRegister dst = i.OutputSimd128Register();
Operand src1 = i.InputOperand(1);
// See comment above for correction of maxps.
__ movaps(kScratchDoubleReg, src1);
__ vmaxps(kScratchDoubleReg, kScratchDoubleReg, dst);
__ vmaxps(dst, dst, src1);
__ vxorps(kScratchDoubleReg, kScratchDoubleReg, dst);
__ vorps(dst, dst, kScratchDoubleReg);
__ vsubps(dst, dst, kScratchDoubleReg);
__ vcmpneqps(kScratchDoubleReg, dst, dst);
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10);
__ vandnps(dst, kScratchDoubleReg, dst);
break;
}
case kSSEF32x4Eq: {

View File

@ -2347,25 +2347,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F32x4Min: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
// minps doesn't propagate NaN lanes in the first source. Compare this
// with itself to generate 1's in those lanes (quiet NaNs) and or them
// with the result of minps to simulate NaN propagation.
__ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
__ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
__ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ orps(i.OutputSimd128Register(), kScratchDoubleReg);
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The minps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minps in both orders, merge the resuls, and adjust.
__ movaps(kScratchDoubleReg, src1);
__ minps(kScratchDoubleReg, dst);
__ minps(dst, src1);
// propagate -0's and NaNs, which may be non-canonical.
__ orps(dst, kScratchDoubleReg);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ movaps(kScratchDoubleReg, dst);
__ cmpps(dst, dst, 4);
__ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg);
break;
}
case kX64F32x4Max: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
// maxps doesn't propagate NaN lanes in the first source. Compare this
// with itself to generate 1's in those lanes (quiet NaNs) and or them
// with the result of maxps to simulate NaN propagation.
__ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
__ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
__ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ orps(i.OutputSimd128Register(), kScratchDoubleReg);
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxps in both orders, merge the resuls, and adjust.
__ movaps(kScratchDoubleReg, src1);
__ maxps(kScratchDoubleReg, dst);
__ maxps(dst, src1);
// Find discrepancies.
__ xorps(kScratchDoubleReg, dst);
// Propagate NaNs, which may be non-canonical.
__ orps(dst, kScratchDoubleReg);
// Propagate sign discrepancy. NaNs and correct lanes are preserved.
__ subps(dst, kScratchDoubleReg);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ movaps(kScratchDoubleReg, dst);
__ cmpps(dst, dst, 4);
__ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg);
break;
}
case kX64F32x4Eq: {

View File

@ -2280,6 +2280,13 @@ void Assembler::andps(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src);
}
void Assembler::andnps(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x55);
emit_sse_operand(dst, src);
}
void Assembler::orps(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
@ -2468,21 +2475,13 @@ void Assembler::cmpltsd(XMMRegister dst, XMMRegister src) {
EMIT(1); // LT == 1
}
void Assembler::movaps(XMMRegister dst, XMMRegister src) {
void Assembler::movaps(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x28);
emit_sse_operand(dst, src);
}
void Assembler::movups(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x10);
emit_sse_operand(dst, src);
}
void Assembler::movups(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);

View File

@ -856,8 +856,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); }
void ucomiss(XMMRegister dst, Operand src);
void movaps(XMMRegister dst, XMMRegister src);
void movups(XMMRegister dst, XMMRegister src);
void movaps(XMMRegister dst, XMMRegister src) { movaps(dst, Operand(src)); }
void movaps(XMMRegister dst, Operand src);
void movups(XMMRegister dst, XMMRegister src) { movups(dst, Operand(src)); }
void movups(XMMRegister dst, Operand src);
void movups(Operand dst, XMMRegister src);
void shufps(XMMRegister dst, XMMRegister src, byte imm8);
@ -869,6 +870,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void andps(XMMRegister dst, Operand src);
void andps(XMMRegister dst, XMMRegister src) { andps(dst, Operand(src)); }
void andnps(XMMRegister dst, Operand src);
void andnps(XMMRegister dst, XMMRegister src) { andnps(dst, Operand(src)); }
void xorps(XMMRegister dst, Operand src);
void xorps(XMMRegister dst, XMMRegister src) { xorps(dst, Operand(src)); }
void orps(XMMRegister dst, Operand src);
@ -1320,9 +1323,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
}
void vmovaps(XMMRegister dst, XMMRegister src) {
vps(0x28, dst, xmm0, Operand(src));
}
void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); }
void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); }
void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
vshufps(dst, src1, Operand(src2), imm8);
}
@ -1500,6 +1504,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
#define PACKED_OP_LIST(V) \
V(and, 0x54) \
V(andn, 0x55) \
V(or, 0x56) \
V(xor, 0x57) \
V(add, 0x58) \

View File

@ -1119,6 +1119,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x55:
AppendToBuffer("vandnps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x57:
AppendToBuffer("vxorps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));

View File

@ -282,6 +282,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO(Punpckhbw, punpckhbw)
AVX_OP3_XO(Pxor, pxor)
AVX_OP3_XO(Andps, andps)
AVX_OP3_XO(Andnps, andnps)
AVX_OP3_XO(Andpd, andpd)
AVX_OP3_XO(Xorps, xorps)
AVX_OP3_XO(Xorpd, xorpd)

View File

@ -2829,6 +2829,21 @@ void Assembler::andps(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src);
}
void Assembler::andnps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x55);
emit_sse_operand(dst, src);
}
void Assembler::andnps(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x55);
emit_sse_operand(dst, src);
}
void Assembler::orps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);

View File

@ -872,6 +872,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void andps(XMMRegister dst, XMMRegister src);
void andps(XMMRegister dst, Operand src);
void andnps(XMMRegister dst, XMMRegister src);
void andnps(XMMRegister dst, Operand src);
void orps(XMMRegister dst, XMMRegister src);
void orps(XMMRegister dst, Operand src);
void xorps(XMMRegister dst, XMMRegister src);
@ -1320,6 +1322,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
AVX_SP_3(vmin, 0x5d)
AVX_SP_3(vmax, 0x5f)
AVX_P_3(vand, 0x54)
AVX_P_3(vandn, 0x55)
AVX_P_3(vor, 0x56)
AVX_P_3(vxor, 0x57)
AVX_3(vcvtsd2ss, 0x5a, vsd)

View File

@ -1313,6 +1313,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x55:
AppendToBuffer("vandnps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x57:
AppendToBuffer("vxorps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));

View File

@ -147,6 +147,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd)
AVX_OP(Andps, andps)
AVX_OP(Andnps, andnps)
AVX_OP(Andpd, andpd)
AVX_OP(Orpd, orpd)
AVX_OP(Cmpeqps, cmpeqps)

View File

@ -399,6 +399,8 @@ TEST(DisasmIa320) {
// logic operation
__ andps(xmm0, xmm1);
__ andps(xmm0, Operand(ebx, ecx, times_4, 10000));
__ andnps(xmm0, xmm1);
__ andnps(xmm0, Operand(ebx, ecx, times_4, 10000));
__ orps(xmm0, xmm1);
__ orps(xmm0, Operand(ebx, ecx, times_4, 10000));
__ xorps(xmm0, xmm1);
@ -619,6 +621,8 @@ TEST(DisasmIa320) {
__ vandps(xmm0, xmm1, xmm2);
__ vandps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vandnps(xmm0, xmm1, xmm2);
__ vandnps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vxorps(xmm0, xmm1, xmm2);
__ vxorps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vaddps(xmm0, xmm1, xmm2);

View File

@ -395,6 +395,8 @@ TEST(DisasmX64) {
// logic operation
__ andps(xmm0, xmm1);
__ andps(xmm0, Operand(rbx, rcx, times_4, 10000));
__ andnps(xmm0, xmm1);
__ andnps(xmm0, Operand(rbx, rcx, times_4, 10000));
__ orps(xmm0, xmm1);
__ orps(xmm0, Operand(rbx, rcx, times_4, 10000));
__ xorps(xmm0, xmm1);
@ -684,6 +686,8 @@ TEST(DisasmX64) {
__ vandps(xmm0, xmm9, xmm2);
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vandnps(xmm0, xmm9, xmm2);
__ vandnps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vxorps(xmm0, xmm1, xmm9);
__ vxorps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vhaddps(xmm0, xmm1, xmm9);

View File

@ -520,8 +520,15 @@ void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
float actual = ReadLittleEndianValue<float>(&g[i]);
if (std::isnan(expected)) {
CHECK(std::isnan(actual));
// Finally, check that NaN outputs are canonical. The sign bit is
// allowed to be non-deterministic.
uint32_t expected_bits = bit_cast<uint32_t>(expected) & ~0x80000000;
uint32_t actual_bits = bit_cast<uint32_t>(actual) & ~0x80000000;
CHECK_EQ(expected_bits, actual_bits);
} else {
CHECK_EQ(expected, actual);
// The sign of 0's must match.
CHECK_EQ(std::signbit(expected), std::signbit(actual));
}
}
}