[wasm-simd][liftoff][ia32][x64] Implement pmin pmax
Implement f32x4.pmin, f32x4.pmax, f64x2.pmin, and f64x2.pmax. Due to the way minps/maxps/minpd/maxpd works [0], we swap lhs and rhs. This is similar to the way TurboFan does this [1]. [0] "If the values being compared are both 0.0s (of either sign), the value in the second operand (source operand) is returned." but pmin/pmax wants to return the lhs (which follows the definition of std::min<T> in C++ STL.) [1] https://source.chromium.org/search?q=instruction-selector-ia32.cc%20VisitPmin&ss=chromium%2Fchromium%2Fsrc:v8%2F Bug: v8:10904 Change-Id: Ie58cae66cd48421c3ab40df33df979b0353b01ee Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2406593 Reviewed-by: Clemens Backes <clemensb@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#69889}
This commit is contained in:
parent
383c4a44cd
commit
863c2cb4eb
@ -2357,6 +2357,16 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
liftoff::MinOrMax::kMax);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "pmin unimplemented");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "pmax unimplemented");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0);
|
||||
@ -2452,6 +2462,16 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
liftoff::GetSimd128Register(rhs));
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "pmin unimplemented");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "pmax unimplemented");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
Simd128Register dst_simd = liftoff::GetSimd128Register(dst);
|
||||
|
@ -1568,6 +1568,16 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Fmax(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "pmin unimplemented");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "pmax unimplemented");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
Dup(dst.fp().V4S(), src.fp().S(), 0);
|
||||
@ -1634,6 +1644,16 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Fmax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "pmin unimplemented");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "pmax unimplemented");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
Dup(dst.fp().V2D(), src.gp().X());
|
||||
|
@ -3784,6 +3784,20 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Andnps(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// Due to the way minps works, pmin(a, b) = minps(b, a).
|
||||
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
|
||||
this, dst, rhs, lhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// Due to the way maxps works, pmax(a, b) = maxps(b, a).
|
||||
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
|
||||
this, dst, rhs, lhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
@ -3898,6 +3912,20 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Andnpd(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// Due to the way minpd works, pmin(a, b) = minpd(b, a).
|
||||
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
|
||||
this, dst, rhs, lhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
|
||||
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
|
||||
this, dst, rhs, lhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
// NAN->0
|
||||
|
@ -980,6 +980,10 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f64x2_neg(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f64x2_sqrt(LiftoffRegister dst, LiftoffRegister src);
|
||||
@ -995,6 +999,10 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
|
||||
LiftoffRegister src);
|
||||
inline void emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
|
||||
|
@ -2732,6 +2732,10 @@ class LiftoffCompiler {
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_min);
|
||||
case wasm::kExprF32x4Max:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_max);
|
||||
case wasm::kExprF32x4Pmin:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_pmin);
|
||||
case wasm::kExprF32x4Pmax:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_pmax);
|
||||
case wasm::kExprF64x2Abs:
|
||||
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_abs);
|
||||
case wasm::kExprF64x2Neg:
|
||||
@ -2750,6 +2754,10 @@ class LiftoffCompiler {
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_min);
|
||||
case wasm::kExprF64x2Max:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_max);
|
||||
case wasm::kExprF64x2Pmin:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_pmin);
|
||||
case wasm::kExprF64x2Pmax:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_pmax);
|
||||
case wasm::kExprI32x4SConvertF32x4:
|
||||
return EmitUnOp<kS128, kS128>(
|
||||
&LiftoffAssembler::emit_i32x4_sconvert_f32x4);
|
||||
|
@ -3376,6 +3376,20 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Andnps(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// Due to the way minps works, pmin(a, b) = minps(b, a).
|
||||
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
|
||||
this, dst, rhs, lhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// Due to the way maxps works, pmax(a, b) = maxps(b, a).
|
||||
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
|
||||
this, dst, rhs, lhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
@ -3490,6 +3504,20 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Andnpd(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// Due to the way minpd works, pmin(a, b) = minpd(b, a).
|
||||
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
|
||||
this, dst, rhs, lhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
// Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
|
||||
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
|
||||
this, dst, rhs, lhs);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
// NAN->0
|
||||
|
Loading…
Reference in New Issue
Block a user