[wasm-simd][arm] Prototype f32x4.nearest

Prototype f32x4.nearest on ARM for both ARM v7 and ARM v8. ARM v8 has
support for vrintn, and for ARM v7 we fallback to runtime.

Since ARM v8 uses vrintn, which is the same instruction used for
F32RoundTiesEven (scalar), wasm-compiler reuses the Float32RoundTiesEven
check.

Bug: v8:10553
Change-Id: I066b8c5f10fd86294afe1c530c516493deeb7b53
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2258037
Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68526}
This commit is contained in:
Ng Zhi An 2020-06-23 17:23:42 -07:00 committed by Commit Bot
parent e19c945b22
commit f332380e47
15 changed files with 65 additions and 9 deletions

View File

@ -3892,7 +3892,18 @@ void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
emit(EncodeNeonVCVT(U32, dst, F32, src));
}
enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF, VRINTM, VRINTP, VRINTZ };
enum UnaryOp {
VMVN,
VSWP,
VABS,
VABSF,
VNEG,
VNEGF,
VRINTM,
VRINTN,
VRINTP,
VRINTZ
};
static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
int dst_code, int src_code) {
@ -3923,6 +3934,9 @@ static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
case VRINTM:
op_encoding = B17 | 0xD * B7;
break;
case VRINTN:
op_encoding = B17 | 0x8 * B7;
break;
case VRINTP:
op_encoding = B17 | 0xF * B7;
break;
@ -4592,6 +4606,14 @@ void Assembler::vrintm(NeonDataType dt, const QwNeonRegister dst,
emit(EncodeNeonUnaryOp(VRINTM, NEON_Q, NeonSize(dt), dst.code(), src.code()));
}
void Assembler::vrintn(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src) {
// SIMD vector round floating-point to integer to Nearest.
// See ARM DDI 0487F.b, F6-5497.
DCHECK(IsEnabled(ARMv8));
emit(EncodeNeonUnaryOp(VRINTN, NEON_Q, NeonSize(dt), dst.code(), src.code()));
}
void Assembler::vrintp(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src) {
// SIMD vector round floating-point to integer towards +Infinity.

View File

@ -912,6 +912,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// ARMv8 rounding instructions (NEON).
void vrintm(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src);
void vrintn(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src);
void vrintp(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src);
void vrintz(NeonDataType dt, const QwNeonRegister dst,

View File

@ -300,6 +300,7 @@ FUNCTION_REFERENCE(wasm_word64_ror, wasm::word64_ror_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_ceil, wasm::f32x4_ceil_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_floor, wasm::f32x4_floor_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_trunc, wasm::f32x4_trunc_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_nearest_int, wasm::f32x4_nearest_int_wrapper)
FUNCTION_REFERENCE(wasm_memory_init, wasm::memory_init_wrapper)
FUNCTION_REFERENCE(wasm_memory_copy, wasm::memory_copy_wrapper)
FUNCTION_REFERENCE(wasm_memory_fill, wasm::memory_fill_wrapper)

View File

@ -209,6 +209,7 @@ class StatsCounter;
V(wasm_f32x4_ceil, "wasm::f32x4_ceil_wrapper") \
V(wasm_f32x4_floor, "wasm::f32x4_floor_wrapper") \
V(wasm_f32x4_trunc, "wasm::f32x4_trunc_wrapper") \
V(wasm_f32x4_nearest_int, "wasm::f32x4_nearest_int_wrapper") \
V(wasm_memory_init, "wasm::memory_init") \
V(wasm_memory_copy, "wasm::memory_copy") \
V(wasm_memory_fill, "wasm::memory_fill") \

View File

@ -1506,7 +1506,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintnF32: {
CpuFeatureScope scope(tasm(), ARMv8);
__ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
if (instr->InputAt(0)->IsSimd128Register()) {
__ vrintn(NeonS32, i.OutputSimd128Register(),
i.InputSimd128Register(0));
} else {
__ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
}
break;
}
case kArmVrintnF64: {

View File

@ -1498,7 +1498,8 @@ void InstructionSelector::VisitUint32Mod(Node* node) {
V(Float64RoundTiesEven, kArmVrintnF64) \
V(F32x4Ceil, kArmVrintpF32) \
V(F32x4Floor, kArmVrintmF32) \
V(F32x4Trunc, kArmVrintzF32)
V(F32x4Trunc, kArmVrintzF32) \
V(F32x4NearestInt, kArmVrintnF32)
#define RRR_OP_LIST(V) \
V(Int32MulHigh, kArmSmmul) \

View File

@ -2696,12 +2696,12 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
// && !V8_TARGET_ARCH_IA32

View File

@ -4058,6 +4058,12 @@ Node* WasmGraphBuilder::BuildF32x4Trunc(Node* input) {
return BuildCFuncInstruction(ref, type, input);
}
Node* WasmGraphBuilder::BuildF32x4NearestInt(Node* input) {
MachineType type = MachineType::Simd128();
ExternalReference ref = ExternalReference::wasm_f32x4_nearest_int();
return BuildCFuncInstruction(ref, type, input);
}
void WasmGraphBuilder::PrintDebugName(Node* node) {
PrintF("#%d:%s", node->id(), node->op()->mnemonic());
}
@ -4315,6 +4321,10 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return BuildF32x4Trunc(inputs[0]);
return graph()->NewNode(mcgraph()->machine()->F32x4Trunc(), inputs[0]);
case wasm::kExprF32x4NearestInt:
// Architecture support for F32x4NearestInt and Float32RoundTiesEven is
// the same.
if (!mcgraph()->machine()->Float32RoundTiesEven().IsSupported())
return BuildF32x4NearestInt(inputs[0]);
return graph()->NewNode(mcgraph()->machine()->F32x4NearestInt(),
inputs[0]);
case wasm::kExprI64x2Splat:

View File

@ -560,6 +560,7 @@ class WasmGraphBuilder {
Node* BuildF32x4Ceil(Node* input);
Node* BuildF32x4Floor(Node* input);
Node* BuildF32x4Trunc(Node* input);
Node* BuildF32x4NearestInt(Node* input);
void BuildEncodeException32BitValue(Node* values_array, uint32_t* index,
Node* value);

View File

@ -2275,10 +2275,17 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.%c%i d%d, q%d", name,
type, size, Vd, Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bit(10) == 1) {
// NEON vrintm, vrintp, vrintz
// NEON vrintm, vrintn, vrintp, vrintz.
bool dp_op = instr->Bit(6) == 0;
int rounding_mode = instr->Bits(9, 7);
switch (rounding_mode) {
case 0:
if (dp_op) {
Format(instr, "vrintn.f32 'Dd, 'Dm");
} else {
Format(instr, "vrintn.f32 'Qd, 'Qm");
}
break;
case 3:
if (dp_op) {
Format(instr, "vrintz.f32 'Dd, 'Dm");

View File

@ -5451,6 +5451,9 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int rounding_mode = instr->Bits(9, 7);
float (*fproundint)(float) = nullptr;
switch (rounding_mode) {
case 0:
fproundint = &nearbyintf;
break;
case 3:
fproundint = &truncf;
break;

View File

@ -423,6 +423,10 @@ void f32x4_trunc_wrapper(Address data) {
simd_float_round_wrapper<float, &truncf>(data);
}
void f32x4_nearest_int_wrapper(Address data) {
simd_float_round_wrapper<float, &nearbyintf>(data);
}
namespace {
class ThreadNotInWasmScope {
// Asan on Windows triggers exceptions to allocate shadow memory lazily. When

View File

@ -85,6 +85,8 @@ V8_EXPORT_PRIVATE void f32x4_floor_wrapper(Address data);
V8_EXPORT_PRIVATE void f32x4_trunc_wrapper(Address data);
V8_EXPORT_PRIVATE void f32x4_nearest_int_wrapper(Address data);
// The return type is {int32_t} instead of {bool} to enforce the compiler to
// zero-extend the result in the return register.
int32_t memory_init_wrapper(Address data);

View File

@ -919,6 +919,7 @@ TEST(ARMv8_vrintX_disasm) {
// Advanced SIMD
COMPARE(vrintm(NeonS32, q0, q3), "f3ba06c6 vrintm.f32 q0, q3");
COMPARE(vrintn(NeonS32, q0, q3), "f3ba0446 vrintn.f32 q0, q3");
COMPARE(vrintp(NeonS32, q0, q3), "f3ba07c6 vrintp.f32 q0, q3");
COMPARE(vrintz(NeonS32, q0, q3), "f3ba05c6 vrintz.f32 q0, q3");
}

View File

@ -708,15 +708,11 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Trunc) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Trunc, truncf, true);
}
// TODO(zhin): Temporary convoluted way to exclude running these tests on ARM as
// we are implementing each opcode one at a time.
#if !V8_TARGET_ARCH_ARM
WASM_SIMD_TEST_NO_LOWERING(F32x4NearestInt) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4NearestInt, nearbyintf,
true);
}
#endif // !V8_TARGET_ARCH_ARM
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
// V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM