[wasm-simd][liftoff][ia32][x64] Implement floating-point roundings

Implement f32x4 and f64x2 nearest, trunc, ceil, and floor for ia32 and
x64. arm and arm64 will follow in a future patch, now they just bail out
into a runtime call.

Bug: v8:10906
Change-Id: I8c90ba6825e0360ca1251a1f706f9dbcba7afdbf
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2411691
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69961}
This commit is contained in:
Ng Zhi An 2020-09-16 14:18:06 -07:00 committed by Commit Bot
parent 5f7e9234d4
commit 7f65469318
6 changed files with 273 additions and 0 deletions

View File

@ -2316,6 +2316,30 @@ void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
vsqrt(dst.high_fp(), src.high_fp());
}
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.ceil");
return true;
}
bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.floor");
return true;
}
bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.trunc");
return true;
}
bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.nearest_int");
return true;
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
vadd(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
@ -2421,6 +2445,30 @@ void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
vsqrt(dst_high.high(), src_high.high());
}
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.ceil");
return true;
}
bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.floor");
return true;
}
bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.trunc");
return true;
}
bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.nearest_int");
return true;
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
vadd(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
@ -3600,6 +3648,11 @@ void LiftoffAssembler::CallC(const wasm::FunctionSig* sig,
case ValueType::kF64:
vstr(args->fp(), MemOperand(sp, arg_bytes));
break;
case ValueType::kS128:
vstr(args->low_fp(), MemOperand(sp, arg_bytes));
vstr(args->high_fp(),
MemOperand(sp, arg_bytes + 2 * kSystemPointerSize));
break;
default:
UNREACHABLE();
}
@ -3643,6 +3696,10 @@ void LiftoffAssembler::CallC(const wasm::FunctionSig* sig,
case ValueType::kF64:
vldr(result_reg->fp(), MemOperand(sp));
break;
case ValueType::kS128:
vld1(Neon8, NeonListOperand(result_reg->low_fp(), 2),
NeonMemOperand(sp));
break;
default:
UNREACHABLE();
}

View File

@ -1545,6 +1545,30 @@ void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
Fsqrt(dst.fp().V2D(), src.fp().V2D());
}
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.ceil");
return true;
}
bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.floor");
return true;
}
bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.trunc");
return true;
}
bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.nearest_int");
return true;
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fadd(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
@ -1621,6 +1645,30 @@ void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
Fsqrt(dst.fp().V4S(), src.fp().V4S());
}
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.ceil");
return true;
}
bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.floor");
return true;
}
bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.trunc");
return true;
}
bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.nearest_int");
return true;
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fadd(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());

View File

@ -3730,6 +3730,34 @@ void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
Sqrtps(dst.fp(), src.fp());
}
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundps(dst.fp(), src.fp(), kRoundUp);
return true;
}
bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundps(dst.fp(), src.fp(), kRoundDown);
return true;
}
bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundps(dst.fp(), src.fp(), kRoundToZero);
return true;
}
bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundps(dst.fp(), src.fp(), kRoundToNearest);
return true;
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
@ -3858,6 +3886,34 @@ void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
Sqrtpd(dst.fp(), src.fp());
}
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundpd(dst.fp(), src.fp(), kRoundUp);
return true;
}
bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundpd(dst.fp(), src.fp(), kRoundDown);
return true;
}
bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundpd(dst.fp(), src.fp(), kRoundToZero);
return true;
}
bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundpd(dst.fp(), src.fp(), kRoundToNearest);
return true;
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(

View File

@ -970,6 +970,10 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_sqrt(LiftoffRegister dst, LiftoffRegister src);
inline bool emit_f32x4_ceil(LiftoffRegister dst, LiftoffRegister src);
inline bool emit_f32x4_floor(LiftoffRegister dst, LiftoffRegister src);
inline bool emit_f32x4_trunc(LiftoffRegister dst, LiftoffRegister src);
inline bool emit_f32x4_nearest_int(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
@ -989,6 +993,10 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f64x2_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f64x2_sqrt(LiftoffRegister dst, LiftoffRegister src);
inline bool emit_f64x2_ceil(LiftoffRegister dst, LiftoffRegister src);
inline bool emit_f64x2_floor(LiftoffRegister dst, LiftoffRegister src);
inline bool emit_f64x2_trunc(LiftoffRegister dst, LiftoffRegister src);
inline bool emit_f64x2_nearest_int(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,

View File

@ -7,6 +7,7 @@
#include "src/base/optional.h"
#include "src/codegen/assembler-inl.h"
// TODO(clemensb): Remove dependences on compiler stuff.
#include "src/codegen/external-reference.h"
#include "src/codegen/interface-descriptors.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/macro-assembler-inl.h"
@ -2498,6 +2499,21 @@ class LiftoffCompiler {
}
}
void EmitSimdFloatRoundingOpWithCFallback(
bool (LiftoffAssembler::*emit_fn)(LiftoffRegister, LiftoffRegister),
ExternalReference (*ext_ref)()) {
static constexpr RegClass rc = reg_class_for(kWasmS128);
LiftoffRegister src = __ PopToRegister();
LiftoffRegister dst = __ GetUnusedRegister(rc, {src}, {});
if (!(asm_.*emit_fn)(dst, src)) {
// Return v128 via stack for ARM.
ValueType sig_v_s_reps[] = {kWasmS128};
FunctionSig sig_v_s(0, 1, sig_v_s_reps);
GenerateCCall(&dst, &sig_v_s, kWasmS128, &src, ext_ref());
}
__ PushRegister(kWasmS128, dst);
}
void SimdOp(FullDecoder* decoder, WasmOpcode opcode, Vector<Value> args,
Value* result) {
if (!CpuFeatures::SupportsWasmSimd128()) {
@ -2764,6 +2780,22 @@ class LiftoffCompiler {
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_neg);
case wasm::kExprF32x4Sqrt:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_sqrt);
case wasm::kExprF32x4Ceil:
return EmitSimdFloatRoundingOpWithCFallback(
&LiftoffAssembler::emit_f32x4_ceil,
&ExternalReference::wasm_f32x4_ceil);
case wasm::kExprF32x4Floor:
return EmitSimdFloatRoundingOpWithCFallback(
&LiftoffAssembler::emit_f32x4_floor,
ExternalReference::wasm_f32x4_floor);
case wasm::kExprF32x4Trunc:
return EmitSimdFloatRoundingOpWithCFallback(
&LiftoffAssembler::emit_f32x4_trunc,
ExternalReference::wasm_f32x4_trunc);
case wasm::kExprF32x4NearestInt:
return EmitSimdFloatRoundingOpWithCFallback(
&LiftoffAssembler::emit_f32x4_nearest_int,
ExternalReference::wasm_f32x4_nearest_int);
case wasm::kExprF32x4Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_add);
case wasm::kExprF32x4Sub:
@ -2786,6 +2818,22 @@ class LiftoffCompiler {
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_neg);
case wasm::kExprF64x2Sqrt:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_sqrt);
case wasm::kExprF64x2Ceil:
return EmitSimdFloatRoundingOpWithCFallback(
&LiftoffAssembler::emit_f64x2_ceil,
&ExternalReference::wasm_f64x2_ceil);
case wasm::kExprF64x2Floor:
return EmitSimdFloatRoundingOpWithCFallback(
&LiftoffAssembler::emit_f64x2_floor,
ExternalReference::wasm_f64x2_floor);
case wasm::kExprF64x2Trunc:
return EmitSimdFloatRoundingOpWithCFallback(
&LiftoffAssembler::emit_f64x2_trunc,
ExternalReference::wasm_f64x2_trunc);
case wasm::kExprF64x2NearestInt:
return EmitSimdFloatRoundingOpWithCFallback(
&LiftoffAssembler::emit_f64x2_nearest_int,
ExternalReference::wasm_f64x2_nearest_int);
case wasm::kExprF64x2Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_add);
case wasm::kExprF64x2Sub:

View File

@ -3324,6 +3324,34 @@ void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
Sqrtps(dst.fp(), src.fp());
}
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundps(dst.fp(), src.fp(), kRoundUp);
return true;
}
bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundps(dst.fp(), src.fp(), kRoundDown);
return true;
}
bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundps(dst.fp(), src.fp(), kRoundToZero);
return true;
}
bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundps(dst.fp(), src.fp(), kRoundToNearest);
return true;
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
@ -3452,6 +3480,34 @@ void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
Sqrtpd(dst.fp(), src.fp());
}
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundpd(dst.fp(), src.fp(), kRoundUp);
return true;
}
bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundpd(dst.fp(), src.fp(), kRoundDown);
return true;
}
bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundpd(dst.fp(), src.fp(), kRoundToZero);
return true;
}
bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
Roundpd(dst.fp(), src.fp(), kRoundToNearest);
return true;
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(