[wasm-simd][arm] Prototype f32x4.nearest
Prototype f32x4.nearest on ARM for both ARM v7 and ARM v8. ARM v8 has support for vrintn, and for ARM v7 we fallback to runtime. Since ARM v8 uses vrintn, which is the same instruction used for F32RoundTiesEven (scalar), wasm-compiler reuses the Float32RoundTiesEven check. Bug: v8:10553 Change-Id: I066b8c5f10fd86294afe1c530c516493deeb7b53 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2258037 Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#68526}
This commit is contained in:
parent
e19c945b22
commit
f332380e47
@ -3892,7 +3892,18 @@ void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
|
||||
emit(EncodeNeonVCVT(U32, dst, F32, src));
|
||||
}
|
||||
|
||||
enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF, VRINTM, VRINTP, VRINTZ };
|
||||
enum UnaryOp {
|
||||
VMVN,
|
||||
VSWP,
|
||||
VABS,
|
||||
VABSF,
|
||||
VNEG,
|
||||
VNEGF,
|
||||
VRINTM,
|
||||
VRINTN,
|
||||
VRINTP,
|
||||
VRINTZ
|
||||
};
|
||||
|
||||
static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
|
||||
int dst_code, int src_code) {
|
||||
@ -3923,6 +3934,9 @@ static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
|
||||
case VRINTM:
|
||||
op_encoding = B17 | 0xD * B7;
|
||||
break;
|
||||
case VRINTN:
|
||||
op_encoding = B17 | 0x8 * B7;
|
||||
break;
|
||||
case VRINTP:
|
||||
op_encoding = B17 | 0xF * B7;
|
||||
break;
|
||||
@ -4592,6 +4606,14 @@ void Assembler::vrintm(NeonDataType dt, const QwNeonRegister dst,
|
||||
emit(EncodeNeonUnaryOp(VRINTM, NEON_Q, NeonSize(dt), dst.code(), src.code()));
|
||||
}
|
||||
|
||||
void Assembler::vrintn(NeonDataType dt, const QwNeonRegister dst,
|
||||
const QwNeonRegister src) {
|
||||
// SIMD vector round floating-point to integer to Nearest.
|
||||
// See ARM DDI 0487F.b, F6-5497.
|
||||
DCHECK(IsEnabled(ARMv8));
|
||||
emit(EncodeNeonUnaryOp(VRINTN, NEON_Q, NeonSize(dt), dst.code(), src.code()));
|
||||
}
|
||||
|
||||
void Assembler::vrintp(NeonDataType dt, const QwNeonRegister dst,
|
||||
const QwNeonRegister src) {
|
||||
// SIMD vector round floating-point to integer towards +Infinity.
|
||||
|
@ -912,6 +912,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
// ARMv8 rounding instructions (NEON).
|
||||
void vrintm(NeonDataType dt, const QwNeonRegister dst,
|
||||
const QwNeonRegister src);
|
||||
void vrintn(NeonDataType dt, const QwNeonRegister dst,
|
||||
const QwNeonRegister src);
|
||||
void vrintp(NeonDataType dt, const QwNeonRegister dst,
|
||||
const QwNeonRegister src);
|
||||
void vrintz(NeonDataType dt, const QwNeonRegister dst,
|
||||
|
@ -300,6 +300,7 @@ FUNCTION_REFERENCE(wasm_word64_ror, wasm::word64_ror_wrapper)
|
||||
FUNCTION_REFERENCE(wasm_f32x4_ceil, wasm::f32x4_ceil_wrapper)
|
||||
FUNCTION_REFERENCE(wasm_f32x4_floor, wasm::f32x4_floor_wrapper)
|
||||
FUNCTION_REFERENCE(wasm_f32x4_trunc, wasm::f32x4_trunc_wrapper)
|
||||
FUNCTION_REFERENCE(wasm_f32x4_nearest_int, wasm::f32x4_nearest_int_wrapper)
|
||||
FUNCTION_REFERENCE(wasm_memory_init, wasm::memory_init_wrapper)
|
||||
FUNCTION_REFERENCE(wasm_memory_copy, wasm::memory_copy_wrapper)
|
||||
FUNCTION_REFERENCE(wasm_memory_fill, wasm::memory_fill_wrapper)
|
||||
|
@ -209,6 +209,7 @@ class StatsCounter;
|
||||
V(wasm_f32x4_ceil, "wasm::f32x4_ceil_wrapper") \
|
||||
V(wasm_f32x4_floor, "wasm::f32x4_floor_wrapper") \
|
||||
V(wasm_f32x4_trunc, "wasm::f32x4_trunc_wrapper") \
|
||||
V(wasm_f32x4_nearest_int, "wasm::f32x4_nearest_int_wrapper") \
|
||||
V(wasm_memory_init, "wasm::memory_init") \
|
||||
V(wasm_memory_copy, "wasm::memory_copy") \
|
||||
V(wasm_memory_fill, "wasm::memory_fill") \
|
||||
|
@ -1506,7 +1506,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kArmVrintnF32: {
|
||||
CpuFeatureScope scope(tasm(), ARMv8);
|
||||
__ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
|
||||
if (instr->InputAt(0)->IsSimd128Register()) {
|
||||
__ vrintn(NeonS32, i.OutputSimd128Register(),
|
||||
i.InputSimd128Register(0));
|
||||
} else {
|
||||
__ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kArmVrintnF64: {
|
||||
|
@ -1498,7 +1498,8 @@ void InstructionSelector::VisitUint32Mod(Node* node) {
|
||||
V(Float64RoundTiesEven, kArmVrintnF64) \
|
||||
V(F32x4Ceil, kArmVrintpF32) \
|
||||
V(F32x4Floor, kArmVrintmF32) \
|
||||
V(F32x4Trunc, kArmVrintzF32)
|
||||
V(F32x4Trunc, kArmVrintzF32) \
|
||||
V(F32x4NearestInt, kArmVrintnF32)
|
||||
|
||||
#define RRR_OP_LIST(V) \
|
||||
V(Int32MulHigh, kArmSmmul) \
|
||||
|
@ -2696,12 +2696,12 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM
|
||||
void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
|
||||
// && !V8_TARGET_ARCH_IA32
|
||||
|
||||
|
@ -4058,6 +4058,12 @@ Node* WasmGraphBuilder::BuildF32x4Trunc(Node* input) {
|
||||
return BuildCFuncInstruction(ref, type, input);
|
||||
}
|
||||
|
||||
Node* WasmGraphBuilder::BuildF32x4NearestInt(Node* input) {
|
||||
MachineType type = MachineType::Simd128();
|
||||
ExternalReference ref = ExternalReference::wasm_f32x4_nearest_int();
|
||||
return BuildCFuncInstruction(ref, type, input);
|
||||
}
|
||||
|
||||
void WasmGraphBuilder::PrintDebugName(Node* node) {
|
||||
PrintF("#%d:%s", node->id(), node->op()->mnemonic());
|
||||
}
|
||||
@ -4315,6 +4321,10 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
|
||||
return BuildF32x4Trunc(inputs[0]);
|
||||
return graph()->NewNode(mcgraph()->machine()->F32x4Trunc(), inputs[0]);
|
||||
case wasm::kExprF32x4NearestInt:
|
||||
// Architecture support for F32x4NearestInt and Float32RoundTiesEven is
|
||||
// the same.
|
||||
if (!mcgraph()->machine()->Float32RoundTiesEven().IsSupported())
|
||||
return BuildF32x4NearestInt(inputs[0]);
|
||||
return graph()->NewNode(mcgraph()->machine()->F32x4NearestInt(),
|
||||
inputs[0]);
|
||||
case wasm::kExprI64x2Splat:
|
||||
|
@ -560,6 +560,7 @@ class WasmGraphBuilder {
|
||||
Node* BuildF32x4Ceil(Node* input);
|
||||
Node* BuildF32x4Floor(Node* input);
|
||||
Node* BuildF32x4Trunc(Node* input);
|
||||
Node* BuildF32x4NearestInt(Node* input);
|
||||
|
||||
void BuildEncodeException32BitValue(Node* values_array, uint32_t* index,
|
||||
Node* value);
|
||||
|
@ -2275,10 +2275,17 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.%c%i d%d, q%d", name,
|
||||
type, size, Vd, Vm);
|
||||
} else if (instr->Bits(17, 16) == 0x2 && instr->Bit(10) == 1) {
|
||||
// NEON vrintm, vrintp, vrintz
|
||||
// NEON vrintm, vrintn, vrintp, vrintz.
|
||||
bool dp_op = instr->Bit(6) == 0;
|
||||
int rounding_mode = instr->Bits(9, 7);
|
||||
switch (rounding_mode) {
|
||||
case 0:
|
||||
if (dp_op) {
|
||||
Format(instr, "vrintn.f32 'Dd, 'Dm");
|
||||
} else {
|
||||
Format(instr, "vrintn.f32 'Qd, 'Qm");
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
if (dp_op) {
|
||||
Format(instr, "vrintz.f32 'Dd, 'Dm");
|
||||
|
@ -5451,6 +5451,9 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
int rounding_mode = instr->Bits(9, 7);
|
||||
float (*fproundint)(float) = nullptr;
|
||||
switch (rounding_mode) {
|
||||
case 0:
|
||||
fproundint = &nearbyintf;
|
||||
break;
|
||||
case 3:
|
||||
fproundint = &truncf;
|
||||
break;
|
||||
|
@ -423,6 +423,10 @@ void f32x4_trunc_wrapper(Address data) {
|
||||
simd_float_round_wrapper<float, &truncf>(data);
|
||||
}
|
||||
|
||||
void f32x4_nearest_int_wrapper(Address data) {
|
||||
simd_float_round_wrapper<float, &nearbyintf>(data);
|
||||
}
|
||||
|
||||
namespace {
|
||||
class ThreadNotInWasmScope {
|
||||
// Asan on Windows triggers exceptions to allocate shadow memory lazily. When
|
||||
|
@ -85,6 +85,8 @@ V8_EXPORT_PRIVATE void f32x4_floor_wrapper(Address data);
|
||||
|
||||
V8_EXPORT_PRIVATE void f32x4_trunc_wrapper(Address data);
|
||||
|
||||
V8_EXPORT_PRIVATE void f32x4_nearest_int_wrapper(Address data);
|
||||
|
||||
// The return type is {int32_t} instead of {bool} to enforce the compiler to
|
||||
// zero-extend the result in the return register.
|
||||
int32_t memory_init_wrapper(Address data);
|
||||
|
@ -919,6 +919,7 @@ TEST(ARMv8_vrintX_disasm) {
|
||||
|
||||
// Advanced SIMD
|
||||
COMPARE(vrintm(NeonS32, q0, q3), "f3ba06c6 vrintm.f32 q0, q3");
|
||||
COMPARE(vrintn(NeonS32, q0, q3), "f3ba0446 vrintn.f32 q0, q3");
|
||||
COMPARE(vrintp(NeonS32, q0, q3), "f3ba07c6 vrintp.f32 q0, q3");
|
||||
COMPARE(vrintz(NeonS32, q0, q3), "f3ba05c6 vrintz.f32 q0, q3");
|
||||
}
|
||||
|
@ -708,15 +708,11 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Trunc) {
|
||||
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Trunc, truncf, true);
|
||||
}
|
||||
|
||||
// TODO(zhin): Temporary convoluted way to exclude running these tests on ARM as
|
||||
// we are implementing each opcode one at a time.
|
||||
#if !V8_TARGET_ARCH_ARM
|
||||
WASM_SIMD_TEST_NO_LOWERING(F32x4NearestInt) {
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4NearestInt, nearbyintf,
|
||||
true);
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
|
||||
// V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user