[wasm-simd][liftoff][ia32][x64] Implement pmin pmax

Implement f32x4.pmin, f32x4.pmax, f64x2.pmin, and f64x2.pmax.

Due to the way minps/maxps/minpd/maxpd works [0], we swap lhs and rhs.
This is similar to the way TurboFan does this [1].

[0] "If the values being compared are both 0.0s (of either sign), the
value in the second operand (source operand) is returned." but pmin/pmax
wants to return the lhs (which follows the definition of std::min<T> in
C++ STL.)

[1]
https://source.chromium.org/search?q=instruction-selector-ia32.cc%20VisitPmin&ss=chromium%2Fchromium%2Fsrc:v8%2F

Bug: v8:10904
Change-Id: Ie58cae66cd48421c3ab40df33df979b0353b01ee
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2406593
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69889}
This commit is contained in:
Ng Zhi An 2020-09-11 11:29:56 -07:00 committed by Commit Bot
parent 383c4a44cd
commit 863c2cb4eb
6 changed files with 112 additions and 0 deletions

View File

@ -2357,6 +2357,16 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
liftoff::MinOrMax::kMax); liftoff::MinOrMax::kMax);
} }
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "pmin unimplemented");
}
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "pmax unimplemented");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0); vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0);
@ -2452,6 +2462,16 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
liftoff::GetSimd128Register(rhs)); liftoff::GetSimd128Register(rhs));
} }
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "pmin unimplemented");
}
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "pmax unimplemented");
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Simd128Register dst_simd = liftoff::GetSimd128Register(dst); Simd128Register dst_simd = liftoff::GetSimd128Register(dst);

View File

@ -1568,6 +1568,16 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
Fmax(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D()); Fmax(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
} }
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "pmin unimplemented");
}
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "pmax unimplemented");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Dup(dst.fp().V4S(), src.fp().S(), 0); Dup(dst.fp().V4S(), src.fp().S(), 0);
@ -1634,6 +1644,16 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
Fmax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S()); Fmax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
} }
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "pmin unimplemented");
}
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "pmax unimplemented");
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Dup(dst.fp().V2D(), src.gp().X()); Dup(dst.fp().V2D(), src.gp().X());

View File

@ -3784,6 +3784,20 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
Andnps(dst.fp(), liftoff::kScratchDoubleReg); Andnps(dst.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Due to the way minps works, pmin(a, b) = minps(b, a).
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Due to the way maxps works, pmax(a, b) = maxps(b, a).
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
if (dst.fp() == src.fp()) { if (dst.fp() == src.fp()) {
@ -3898,6 +3912,20 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
Andnpd(dst.fp(), liftoff::kScratchDoubleReg); Andnpd(dst.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Due to the way minpd works, pmin(a, b) = minpd(b, a).
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
// NAN->0 // NAN->0

View File

@ -980,6 +980,10 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src); inline void emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f64x2_neg(LiftoffRegister dst, LiftoffRegister src); inline void emit_f64x2_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f64x2_sqrt(LiftoffRegister dst, LiftoffRegister src); inline void emit_f64x2_sqrt(LiftoffRegister dst, LiftoffRegister src);
@ -995,6 +999,10 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_sconvert_f32x4(LiftoffRegister dst, inline void emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src); LiftoffRegister src);
inline void emit_i32x4_uconvert_f32x4(LiftoffRegister dst, inline void emit_i32x4_uconvert_f32x4(LiftoffRegister dst,

View File

@ -2732,6 +2732,10 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_min); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_min);
case wasm::kExprF32x4Max: case wasm::kExprF32x4Max:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_max); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_max);
case wasm::kExprF32x4Pmin:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_pmin);
case wasm::kExprF32x4Pmax:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_pmax);
case wasm::kExprF64x2Abs: case wasm::kExprF64x2Abs:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_abs); return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_abs);
case wasm::kExprF64x2Neg: case wasm::kExprF64x2Neg:
@ -2750,6 +2754,10 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_min); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_min);
case wasm::kExprF64x2Max: case wasm::kExprF64x2Max:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_max); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_max);
case wasm::kExprF64x2Pmin:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_pmin);
case wasm::kExprF64x2Pmax:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_pmax);
case wasm::kExprI32x4SConvertF32x4: case wasm::kExprI32x4SConvertF32x4:
return EmitUnOp<kS128, kS128>( return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_sconvert_f32x4); &LiftoffAssembler::emit_i32x4_sconvert_f32x4);

View File

@ -3376,6 +3376,20 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
Andnps(dst.fp(), kScratchDoubleReg); Andnps(dst.fp(), kScratchDoubleReg);
} }
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Due to the way minps works, pmin(a, b) = minps(b, a).
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Due to the way maxps works, pmax(a, b) = maxps(b, a).
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
if (dst.fp() == src.fp()) { if (dst.fp() == src.fp()) {
@ -3490,6 +3504,20 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
Andnpd(dst.fp(), kScratchDoubleReg); Andnpd(dst.fp(), kScratchDoubleReg);
} }
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Due to the way minpd works, pmin(a, b) = minpd(b, a).
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
// NAN->0 // NAN->0