[wasm-simd] [liftoff] Implement fp min/max on ia32 and x64
Bug: v8:9909 Change-Id: Ib97bcc7afe516a014cd91128aa3c59f1b8b0b0af Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2151999 Commit-Queue: Fanchen Kong <fanchen.kong@intel.com> Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#67301}
This commit is contained in:
parent
ec3cadc41e
commit
9f41a58417
@ -329,6 +329,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP3_XO(Xorpd, xorpd)
|
||||
AVX_OP3_XO(Sqrtss, sqrtss)
|
||||
AVX_OP3_XO(Sqrtsd, sqrtsd)
|
||||
AVX_OP3_XO(Orps, orps)
|
||||
AVX_OP3_XO(Orpd, orpd)
|
||||
AVX_OP3_XO(Andnpd, andnpd)
|
||||
|
||||
@ -351,6 +352,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
|
||||
|
||||
AVX_PACKED_OP3(Addpd, addpd)
|
||||
AVX_PACKED_OP3(Subps, subps)
|
||||
AVX_PACKED_OP3(Subpd, subpd)
|
||||
AVX_PACKED_OP3(Mulpd, mulpd)
|
||||
AVX_PACKED_OP3(Divpd, divpd)
|
||||
@ -360,6 +362,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_PACKED_OP3(Cmplepd, cmplepd)
|
||||
AVX_PACKED_OP3(Minpd, minpd)
|
||||
AVX_PACKED_OP3(Maxpd, maxpd)
|
||||
AVX_PACKED_OP3(Cmpunordps, cmpunordps)
|
||||
AVX_PACKED_OP3(Cmpunordpd, cmpunordpd)
|
||||
AVX_PACKED_OP3(Psllw, psllw)
|
||||
AVX_PACKED_OP3(Pslld, pslld)
|
||||
|
@ -1704,6 +1704,16 @@ void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
vdiv(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "f64x2min");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "f64x2max");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0);
|
||||
@ -1787,6 +1797,16 @@ void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
vdiv(dst_high.high(), lhs_high.high(), rhs_high.high());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "f32x4min");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "f32x4max");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
Simd128Register dst_simd = liftoff::GetSimd128Register(dst);
|
||||
|
@ -1141,6 +1141,16 @@ void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Fdiv(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "f64x2min");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "f64x2max");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
Dup(dst.fp().V4S(), src.fp().S(), 0);
|
||||
@ -1197,6 +1207,16 @@ void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Fdiv(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "f32x4min");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "f32x4max");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
Dup(dst.fp().V2D(), src.gp().X());
|
||||
|
@ -2506,6 +2506,65 @@ void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
this, dst, lhs, rhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// The minps instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform minps in both orders, merge the results, and adjust.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vminps(liftoff::kScratchDoubleReg, lhs.fp(), rhs.fp());
|
||||
vminps(dst.fp(), rhs.fp(), lhs.fp());
|
||||
} else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
|
||||
XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
|
||||
movaps(liftoff::kScratchDoubleReg, src);
|
||||
minps(liftoff::kScratchDoubleReg, dst.fp());
|
||||
minps(dst.fp(), src);
|
||||
} else {
|
||||
movaps(liftoff::kScratchDoubleReg, lhs.fp());
|
||||
minps(liftoff::kScratchDoubleReg, rhs.fp());
|
||||
movaps(dst.fp(), rhs.fp());
|
||||
minps(dst.fp(), lhs.fp());
|
||||
}
|
||||
// propagate -0's and NaNs, which may be non-canonical.
|
||||
Orps(liftoff::kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
Cmpunordps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
|
||||
Orps(liftoff::kScratchDoubleReg, dst.fp());
|
||||
Psrld(dst.fp(), dst.fp(), byte{10});
|
||||
Andnps(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// The maxps instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform maxps in both orders, merge the results, and adjust.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vmaxps(liftoff::kScratchDoubleReg, lhs.fp(), rhs.fp());
|
||||
vmaxps(dst.fp(), rhs.fp(), lhs.fp());
|
||||
} else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
|
||||
XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
|
||||
movaps(liftoff::kScratchDoubleReg, src);
|
||||
maxps(liftoff::kScratchDoubleReg, dst.fp());
|
||||
maxps(dst.fp(), src);
|
||||
} else {
|
||||
movaps(liftoff::kScratchDoubleReg, lhs.fp());
|
||||
maxps(liftoff::kScratchDoubleReg, rhs.fp());
|
||||
movaps(dst.fp(), rhs.fp());
|
||||
maxps(dst.fp(), lhs.fp());
|
||||
}
|
||||
// Find discrepancies.
|
||||
Xorps(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
// Propagate NaNs, which may be non-canonical.
|
||||
Orps(liftoff::kScratchDoubleReg, dst.fp());
|
||||
// Propagate sign discrepancy and (subtle) quiet NaNs.
|
||||
Subps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
Cmpunordps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
|
||||
Psrld(dst.fp(), dst.fp(), byte{10});
|
||||
Andnps(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
@ -2561,6 +2620,65 @@ void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
this, dst, lhs, rhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// The minpd instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform minpd in both orders, merge the results, and adjust.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vminpd(liftoff::kScratchDoubleReg, lhs.fp(), rhs.fp());
|
||||
vminpd(dst.fp(), rhs.fp(), lhs.fp());
|
||||
} else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
|
||||
XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
|
||||
movapd(liftoff::kScratchDoubleReg, src);
|
||||
minpd(liftoff::kScratchDoubleReg, dst.fp());
|
||||
minpd(dst.fp(), src);
|
||||
} else {
|
||||
movapd(liftoff::kScratchDoubleReg, lhs.fp());
|
||||
minpd(liftoff::kScratchDoubleReg, rhs.fp());
|
||||
movapd(dst.fp(), rhs.fp());
|
||||
minpd(dst.fp(), lhs.fp());
|
||||
}
|
||||
// propagate -0's and NaNs, which may be non-canonical.
|
||||
Orpd(liftoff::kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
Cmpunordpd(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
|
||||
Orpd(liftoff::kScratchDoubleReg, dst.fp());
|
||||
Psrlq(dst.fp(), 13);
|
||||
Andnpd(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// The maxpd instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform maxpd in both orders, merge the results, and adjust.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vmaxpd(liftoff::kScratchDoubleReg, lhs.fp(), rhs.fp());
|
||||
vmaxpd(dst.fp(), rhs.fp(), lhs.fp());
|
||||
} else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
|
||||
XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
|
||||
movapd(liftoff::kScratchDoubleReg, src);
|
||||
maxpd(liftoff::kScratchDoubleReg, dst.fp());
|
||||
maxpd(dst.fp(), src);
|
||||
} else {
|
||||
movapd(liftoff::kScratchDoubleReg, lhs.fp());
|
||||
maxpd(liftoff::kScratchDoubleReg, rhs.fp());
|
||||
movapd(dst.fp(), rhs.fp());
|
||||
maxpd(dst.fp(), lhs.fp());
|
||||
}
|
||||
// Find discrepancies.
|
||||
Xorpd(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
// Propagate NaNs, which may be non-canonical.
|
||||
Orpd(liftoff::kScratchDoubleReg, dst.fp());
|
||||
// Propagate sign discrepancy and (subtle) quiet NaNs.
|
||||
Subpd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
Cmpunordpd(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
|
||||
Psrlq(dst.fp(), 13);
|
||||
Andnpd(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
|
@ -843,6 +843,10 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f64x2_neg(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f64x2_sqrt(LiftoffRegister dst, LiftoffRegister src);
|
||||
@ -854,6 +858,10 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
|
@ -2464,6 +2464,10 @@ class LiftoffCompiler {
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_mul);
|
||||
case wasm::kExprF32x4Div:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_div);
|
||||
case wasm::kExprF32x4Min:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_min);
|
||||
case wasm::kExprF32x4Max:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_max);
|
||||
case wasm::kExprF64x2Abs:
|
||||
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_abs);
|
||||
case wasm::kExprF64x2Neg:
|
||||
@ -2478,6 +2482,10 @@ class LiftoffCompiler {
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_mul);
|
||||
case wasm::kExprF64x2Div:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_div);
|
||||
case wasm::kExprF64x2Min:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_min);
|
||||
case wasm::kExprF64x2Max:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_max);
|
||||
case wasm::kExprI8x16SConvertI16x8:
|
||||
return EmitBinOp<kS128, kS128>(
|
||||
&LiftoffAssembler::emit_i8x16_sconvert_i16x8);
|
||||
|
@ -2468,6 +2468,65 @@ void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
this, dst, lhs, rhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// The minps instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform minps in both orders, merge the results, and adjust.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vminps(kScratchDoubleReg, lhs.fp(), rhs.fp());
|
||||
vminps(dst.fp(), rhs.fp(), lhs.fp());
|
||||
} else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
|
||||
XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
|
||||
movaps(kScratchDoubleReg, src);
|
||||
minps(kScratchDoubleReg, dst.fp());
|
||||
minps(dst.fp(), src);
|
||||
} else {
|
||||
movaps(kScratchDoubleReg, lhs.fp());
|
||||
minps(kScratchDoubleReg, rhs.fp());
|
||||
movaps(dst.fp(), rhs.fp());
|
||||
minps(dst.fp(), lhs.fp());
|
||||
}
|
||||
// propagate -0's and NaNs, which may be non-canonical.
|
||||
Orps(kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
Cmpps(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Orps(kScratchDoubleReg, dst.fp());
|
||||
Psrld(dst.fp(), byte{10});
|
||||
Andnps(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// The maxps instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform maxps in both orders, merge the results, and adjust.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vmaxps(kScratchDoubleReg, lhs.fp(), rhs.fp());
|
||||
vmaxps(dst.fp(), rhs.fp(), lhs.fp());
|
||||
} else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
|
||||
XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
|
||||
movaps(kScratchDoubleReg, src);
|
||||
maxps(kScratchDoubleReg, dst.fp());
|
||||
maxps(dst.fp(), src);
|
||||
} else {
|
||||
movaps(kScratchDoubleReg, lhs.fp());
|
||||
maxps(kScratchDoubleReg, rhs.fp());
|
||||
movaps(dst.fp(), rhs.fp());
|
||||
maxps(dst.fp(), lhs.fp());
|
||||
}
|
||||
// Find discrepancies.
|
||||
Xorps(dst.fp(), kScratchDoubleReg);
|
||||
// Propagate NaNs, which may be non-canonical.
|
||||
Orps(kScratchDoubleReg, dst.fp());
|
||||
// Propagate sign discrepancy and (subtle) quiet NaNs.
|
||||
Subps(kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
Cmpps(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Psrld(dst.fp(), byte{10});
|
||||
Andnps(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
@ -2523,6 +2582,65 @@ void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
this, dst, lhs, rhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// The minpd instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform minpd in both orders, merge the results, and adjust.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vminpd(kScratchDoubleReg, lhs.fp(), rhs.fp());
|
||||
vminpd(dst.fp(), rhs.fp(), lhs.fp());
|
||||
} else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
|
||||
XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
|
||||
movapd(kScratchDoubleReg, src);
|
||||
minpd(kScratchDoubleReg, dst.fp());
|
||||
minpd(dst.fp(), src);
|
||||
} else {
|
||||
movapd(kScratchDoubleReg, lhs.fp());
|
||||
minpd(kScratchDoubleReg, rhs.fp());
|
||||
movapd(dst.fp(), rhs.fp());
|
||||
minpd(dst.fp(), lhs.fp());
|
||||
}
|
||||
// propagate -0's and NaNs, which may be non-canonical.
|
||||
Orpd(kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Orpd(kScratchDoubleReg, dst.fp());
|
||||
Psrlq(dst.fp(), 13);
|
||||
Andnpd(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// The maxpd instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform maxpd in both orders, merge the results, and adjust.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vmaxpd(kScratchDoubleReg, lhs.fp(), rhs.fp());
|
||||
vmaxpd(dst.fp(), rhs.fp(), lhs.fp());
|
||||
} else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
|
||||
XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
|
||||
movapd(kScratchDoubleReg, src);
|
||||
maxpd(kScratchDoubleReg, dst.fp());
|
||||
maxpd(dst.fp(), src);
|
||||
} else {
|
||||
movapd(kScratchDoubleReg, lhs.fp());
|
||||
maxpd(kScratchDoubleReg, rhs.fp());
|
||||
movapd(dst.fp(), rhs.fp());
|
||||
maxpd(dst.fp(), lhs.fp());
|
||||
}
|
||||
// Find discrepancies.
|
||||
Xorpd(dst.fp(), kScratchDoubleReg);
|
||||
// Propagate NaNs, which may be non-canonical.
|
||||
Orpd(kScratchDoubleReg, dst.fp());
|
||||
// Propagate sign discrepancy and (subtle) quiet NaNs.
|
||||
Subpd(kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Psrlq(dst.fp(), 13);
|
||||
Andnpd(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
|
Loading…
Reference in New Issue
Block a user