[wasm-simd][liftoff][ia32][x64] Implement floating-point roundings
Implement f32x4 and f64x2 nearest, trunc, ceil, and floor for ia32 and x64. arm and arm64 will follow in a future patch, now they just bail out into a runtime call. Bug: v8:10906 Change-Id: I8c90ba6825e0360ca1251a1f706f9dbcba7afdbf Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2411691 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Clemens Backes <clemensb@chromium.org> Cr-Commit-Position: refs/heads/master@{#69961}
This commit is contained in:
parent
5f7e9234d4
commit
7f65469318
@ -2316,6 +2316,30 @@ void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
|
||||
vsqrt(dst.high_fp(), src.high_fp());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f64x2.ceil");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f64x2.floor");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f64x2.trunc");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f64x2.nearest_int");
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
vadd(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
|
||||
@ -2421,6 +2445,30 @@ void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
|
||||
vsqrt(dst_high.high(), src_high.high());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f32x4.ceil");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f32x4.floor");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f32x4.trunc");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f32x4.nearest_int");
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
vadd(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
|
||||
@ -3600,6 +3648,11 @@ void LiftoffAssembler::CallC(const wasm::FunctionSig* sig,
|
||||
case ValueType::kF64:
|
||||
vstr(args->fp(), MemOperand(sp, arg_bytes));
|
||||
break;
|
||||
case ValueType::kS128:
|
||||
vstr(args->low_fp(), MemOperand(sp, arg_bytes));
|
||||
vstr(args->high_fp(),
|
||||
MemOperand(sp, arg_bytes + 2 * kSystemPointerSize));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
@ -3643,6 +3696,10 @@ void LiftoffAssembler::CallC(const wasm::FunctionSig* sig,
|
||||
case ValueType::kF64:
|
||||
vldr(result_reg->fp(), MemOperand(sp));
|
||||
break;
|
||||
case ValueType::kS128:
|
||||
vld1(Neon8, NeonListOperand(result_reg->low_fp(), 2),
|
||||
NeonMemOperand(sp));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -1545,6 +1545,30 @@ void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
|
||||
Fsqrt(dst.fp().V2D(), src.fp().V2D());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f64x2.ceil");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f64x2.floor");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f64x2.trunc");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f64x2.nearest_int");
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
Fadd(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
|
||||
@ -1621,6 +1645,30 @@ void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
|
||||
Fsqrt(dst.fp().V4S(), src.fp().V4S());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f32x4.ceil");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f32x4.floor");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f32x4.trunc");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "f32x4.nearest_int");
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
Fadd(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
|
||||
|
@ -3730,6 +3730,34 @@ void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
|
||||
Sqrtps(dst.fp(), src.fp());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundps(dst.fp(), src.fp(), kRoundUp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundps(dst.fp(), src.fp(), kRoundDown);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundps(dst.fp(), src.fp(), kRoundToZero);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundps(dst.fp(), src.fp(), kRoundToNearest);
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
|
||||
@ -3858,6 +3886,34 @@ void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
|
||||
Sqrtpd(dst.fp(), src.fp());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundpd(dst.fp(), src.fp(), kRoundUp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundpd(dst.fp(), src.fp(), kRoundDown);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundpd(dst.fp(), src.fp(), kRoundToZero);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundpd(dst.fp(), src.fp(), kRoundToNearest);
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
|
||||
|
@ -970,6 +970,10 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
inline void emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f32x4_neg(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f32x4_sqrt(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline bool emit_f32x4_ceil(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline bool emit_f32x4_floor(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline bool emit_f32x4_trunc(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline bool emit_f32x4_nearest_int(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
@ -989,6 +993,10 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
inline void emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f64x2_neg(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f64x2_sqrt(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline bool emit_f64x2_ceil(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline bool emit_f64x2_floor(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline bool emit_f64x2_trunc(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline bool emit_f64x2_nearest_int(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs);
|
||||
inline void emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "src/base/optional.h"
|
||||
#include "src/codegen/assembler-inl.h"
|
||||
// TODO(clemensb): Remove dependences on compiler stuff.
|
||||
#include "src/codegen/external-reference.h"
|
||||
#include "src/codegen/interface-descriptors.h"
|
||||
#include "src/codegen/machine-type.h"
|
||||
#include "src/codegen/macro-assembler-inl.h"
|
||||
@ -2498,6 +2499,21 @@ class LiftoffCompiler {
|
||||
}
|
||||
}
|
||||
|
||||
void EmitSimdFloatRoundingOpWithCFallback(
|
||||
bool (LiftoffAssembler::*emit_fn)(LiftoffRegister, LiftoffRegister),
|
||||
ExternalReference (*ext_ref)()) {
|
||||
static constexpr RegClass rc = reg_class_for(kWasmS128);
|
||||
LiftoffRegister src = __ PopToRegister();
|
||||
LiftoffRegister dst = __ GetUnusedRegister(rc, {src}, {});
|
||||
if (!(asm_.*emit_fn)(dst, src)) {
|
||||
// Return v128 via stack for ARM.
|
||||
ValueType sig_v_s_reps[] = {kWasmS128};
|
||||
FunctionSig sig_v_s(0, 1, sig_v_s_reps);
|
||||
GenerateCCall(&dst, &sig_v_s, kWasmS128, &src, ext_ref());
|
||||
}
|
||||
__ PushRegister(kWasmS128, dst);
|
||||
}
|
||||
|
||||
void SimdOp(FullDecoder* decoder, WasmOpcode opcode, Vector<Value> args,
|
||||
Value* result) {
|
||||
if (!CpuFeatures::SupportsWasmSimd128()) {
|
||||
@ -2764,6 +2780,22 @@ class LiftoffCompiler {
|
||||
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_neg);
|
||||
case wasm::kExprF32x4Sqrt:
|
||||
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_sqrt);
|
||||
case wasm::kExprF32x4Ceil:
|
||||
return EmitSimdFloatRoundingOpWithCFallback(
|
||||
&LiftoffAssembler::emit_f32x4_ceil,
|
||||
&ExternalReference::wasm_f32x4_ceil);
|
||||
case wasm::kExprF32x4Floor:
|
||||
return EmitSimdFloatRoundingOpWithCFallback(
|
||||
&LiftoffAssembler::emit_f32x4_floor,
|
||||
ExternalReference::wasm_f32x4_floor);
|
||||
case wasm::kExprF32x4Trunc:
|
||||
return EmitSimdFloatRoundingOpWithCFallback(
|
||||
&LiftoffAssembler::emit_f32x4_trunc,
|
||||
ExternalReference::wasm_f32x4_trunc);
|
||||
case wasm::kExprF32x4NearestInt:
|
||||
return EmitSimdFloatRoundingOpWithCFallback(
|
||||
&LiftoffAssembler::emit_f32x4_nearest_int,
|
||||
ExternalReference::wasm_f32x4_nearest_int);
|
||||
case wasm::kExprF32x4Add:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_add);
|
||||
case wasm::kExprF32x4Sub:
|
||||
@ -2786,6 +2818,22 @@ class LiftoffCompiler {
|
||||
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_neg);
|
||||
case wasm::kExprF64x2Sqrt:
|
||||
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_sqrt);
|
||||
case wasm::kExprF64x2Ceil:
|
||||
return EmitSimdFloatRoundingOpWithCFallback(
|
||||
&LiftoffAssembler::emit_f64x2_ceil,
|
||||
&ExternalReference::wasm_f64x2_ceil);
|
||||
case wasm::kExprF64x2Floor:
|
||||
return EmitSimdFloatRoundingOpWithCFallback(
|
||||
&LiftoffAssembler::emit_f64x2_floor,
|
||||
ExternalReference::wasm_f64x2_floor);
|
||||
case wasm::kExprF64x2Trunc:
|
||||
return EmitSimdFloatRoundingOpWithCFallback(
|
||||
&LiftoffAssembler::emit_f64x2_trunc,
|
||||
ExternalReference::wasm_f64x2_trunc);
|
||||
case wasm::kExprF64x2NearestInt:
|
||||
return EmitSimdFloatRoundingOpWithCFallback(
|
||||
&LiftoffAssembler::emit_f64x2_nearest_int,
|
||||
ExternalReference::wasm_f64x2_nearest_int);
|
||||
case wasm::kExprF64x2Add:
|
||||
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_add);
|
||||
case wasm::kExprF64x2Sub:
|
||||
|
@ -3324,6 +3324,34 @@ void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
|
||||
Sqrtps(dst.fp(), src.fp());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundps(dst.fp(), src.fp(), kRoundUp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundps(dst.fp(), src.fp(), kRoundDown);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundps(dst.fp(), src.fp(), kRoundToZero);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundps(dst.fp(), src.fp(), kRoundToNearest);
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
|
||||
@ -3452,6 +3480,34 @@ void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
|
||||
Sqrtpd(dst.fp(), src.fp());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundpd(dst.fp(), src.fp(), kRoundUp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundpd(dst.fp(), src.fp(), kRoundDown);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundpd(dst.fp(), src.fp(), kRoundToZero);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
DCHECK(CpuFeatures::IsSupported(SSE4_1));
|
||||
Roundpd(dst.fp(), src.fp(), kRoundToNearest);
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
|
||||
|
Loading…
Reference in New Issue
Block a user