From 062ba7e78f373aa4b973869367074887cee1241d Mon Sep 17 00:00:00 2001 From: Zhi An Ng Date: Mon, 2 Nov 2020 09:16:52 +0000 Subject: [PATCH] [wasm-simd][arm64] Prototype extended pairwise additions Prototype i32x4.extadd_pairwise_i16x8_{s,u} and i16x8.extadd_pairwise_i8x16{s,u} (names not confirmed) on ARM64 and interpreter. With a simple test case. Bug: v8:11086 Change-Id: If1ffc04e179e86ca5cc209bf9ef9d337298e3cc2 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2513872 Reviewed-by: Tobias Tebbi Reviewed-by: Bill Budge Commit-Queue: Zhi An Ng Cr-Commit-Position: refs/heads/master@{#71068} --- .../backend/arm64/code-generator-arm64.cc | 14 ++++ .../backend/arm64/instruction-codes-arm64.h | 2 + .../arm64/instruction-scheduler-arm64.cc | 2 + .../arm64/instruction-selector-arm64.cc | 25 +++++++ src/compiler/backend/instruction-selector.cc | 22 ++++++ src/compiler/machine-operator.cc | 4 ++ src/compiler/machine-operator.h | 4 ++ src/compiler/opcodes.h | 4 ++ src/compiler/wasm-compiler.cc | 12 ++++ src/utils/utils.h | 15 ++++ src/wasm/wasm-opcodes-inl.h | 3 + src/wasm/wasm-opcodes.h | 72 ++++++++++--------- test/cctest/wasm/test-run-wasm-simd.cc | 51 +++++++++++++ test/common/wasm/wasm-interpreter.cc | 27 +++++++ 14 files changed, 223 insertions(+), 34 deletions(-) diff --git a/src/compiler/backend/arm64/code-generator-arm64.cc b/src/compiler/backend/arm64/code-generator-arm64.cc index 5df21ce350..e07cfe4c50 100644 --- a/src/compiler/backend/arm64/code-generator-arm64.cc +++ b/src/compiler/backend/arm64/code-generator-arm64.cc @@ -1139,6 +1139,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArm64Mul32: __ Mul(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1)); break; + case kArm64Saddlp: { + VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode)); + VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f); + __ Saddlp(i.OutputSimd128Register().Format(dst_f), + i.InputSimd128Register(0).Format(src_f)); + break; + } + case kArm64Uaddlp: { + VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode)); + VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f); + __ Uaddlp(i.OutputSimd128Register().Format(dst_f), + i.InputSimd128Register(0).Format(src_f)); + break; + } case kArm64Smull: { if (instr->InputAt(0)->IsRegister()) { __ Smull(i.OutputRegister(), i.InputRegister32(0), diff --git a/src/compiler/backend/arm64/instruction-codes-arm64.h b/src/compiler/backend/arm64/instruction-codes-arm64.h index 0898380dbf..c80538f3a9 100644 --- a/src/compiler/backend/arm64/instruction-codes-arm64.h +++ b/src/compiler/backend/arm64/instruction-codes-arm64.h @@ -35,12 +35,14 @@ namespace compiler { V(Arm64Eor32) \ V(Arm64Eon) \ V(Arm64Eon32) \ + V(Arm64Saddlp) \ V(Arm64Sub) \ V(Arm64Sub32) \ V(Arm64Mul) \ V(Arm64Mul32) \ V(Arm64Smull) \ V(Arm64Smull2) \ + V(Arm64Uaddlp) \ V(Arm64Umull) \ V(Arm64Umull2) \ V(Arm64Madd) \ diff --git a/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/src/compiler/backend/arm64/instruction-scheduler-arm64.cc index 0a86ff2e55..9d53074042 100644 --- a/src/compiler/backend/arm64/instruction-scheduler-arm64.cc +++ b/src/compiler/backend/arm64/instruction-scheduler-arm64.cc @@ -36,12 +36,14 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64Eor32: case kArm64Eon: case kArm64Eon32: + case kArm64Saddlp: case kArm64Sub: case kArm64Sub32: case kArm64Mul: case kArm64Mul32: case kArm64Smull: case kArm64Smull2: + case kArm64Uaddlp: case kArm64Umull: case kArm64Umull2: case kArm64Madd: diff --git a/src/compiler/backend/arm64/instruction-selector-arm64.cc b/src/compiler/backend/arm64/instruction-selector-arm64.cc index a7634bdf78..584cfb6184 100644 --- a/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -1708,6 +1708,31 @@ void InstructionSelector::VisitI64x2ExtMulHighI32x4U(Node* node) { VisitExtMul(this, kArm64Umull2, node, 64); } +namespace { +void VisitExtAddPairwise(InstructionSelector* selector, ArchOpcode opcode, + Node* node, int dst_lane_size) { + InstructionCode code = opcode; + code |= MiscField::encode(dst_lane_size); + VisitRR(selector, code, node); +} +} // namespace + +void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) { + VisitExtAddPairwise(this, kArm64Saddlp, node, 32); +} + +void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8U(Node* node) { + VisitExtAddPairwise(this, kArm64Uaddlp, node, 32); +} + +void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16S(Node* node) { + VisitExtAddPairwise(this, kArm64Saddlp, node, 16); +} + +void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) { + VisitExtAddPairwise(this, kArm64Uaddlp, node, 16); +} + void InstructionSelector::VisitInt32MulHigh(Node* node) { Arm64OperandGenerator g(this); InstructionOperand const smull_operand = g.TempRegister(); diff --git a/src/compiler/backend/instruction-selector.cc b/src/compiler/backend/instruction-selector.cc index c9ef64f976..b62cc83532 100644 --- a/src/compiler/backend/instruction-selector.cc +++ b/src/compiler/backend/instruction-selector.cc @@ -2097,6 +2097,10 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitI32x4ExtMulHighI16x8U(node); case IrOpcode::kI32x4SignSelect: return MarkAsSimd128(node), VisitI32x4SignSelect(node); + case IrOpcode::kI32x4ExtAddPairwiseI16x8S: + return MarkAsSimd128(node), VisitI32x4ExtAddPairwiseI16x8S(node); + case IrOpcode::kI32x4ExtAddPairwiseI16x8U: + return MarkAsSimd128(node), VisitI32x4ExtAddPairwiseI16x8U(node); case IrOpcode::kI16x8Splat: return MarkAsSimd128(node), VisitI16x8Splat(node); case IrOpcode::kI16x8ExtractLaneU: @@ -2179,6 +2183,10 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitI16x8ExtMulHighI8x16U(node); case IrOpcode::kI16x8SignSelect: return MarkAsSimd128(node), VisitI16x8SignSelect(node); + case IrOpcode::kI16x8ExtAddPairwiseI8x16S: + return MarkAsSimd128(node), VisitI16x8ExtAddPairwiseI8x16S(node); + case IrOpcode::kI16x8ExtAddPairwiseI8x16U: + return MarkAsSimd128(node), VisitI16x8ExtAddPairwiseI8x16U(node); case IrOpcode::kI8x16Splat: return MarkAsSimd128(node), VisitI8x16Splat(node); case IrOpcode::kI8x16ExtractLaneU: @@ -2772,6 +2780,20 @@ void InstructionSelector::VisitI16x8ExtMulLowI8x16U(Node* node) { void InstructionSelector::VisitI16x8ExtMulHighI8x16U(Node* node) { UNIMPLEMENTED(); } + +// TODO(v8:11086) Prototype extended pairwise add. +void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) { + UNIMPLEMENTED(); +} +void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8U(Node* node) { + UNIMPLEMENTED(); +} +void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16S(Node* node) { + UNIMPLEMENTED(); +} +void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) { + UNIMPLEMENTED(); +} #endif // !V8_TARGET_ARCH_ARM64 #if !V8_TARGET_ARCH_X64 diff --git a/src/compiler/machine-operator.cc b/src/compiler/machine-operator.cc index ed675253f5..e3d16d7e60 100644 --- a/src/compiler/machine-operator.cc +++ b/src/compiler/machine-operator.cc @@ -463,6 +463,8 @@ ShiftKind ShiftKindOf(Operator const* op) { V(I32x4ExtMulLowI16x8U, Operator::kCommutative, 2, 0, 1) \ V(I32x4ExtMulHighI16x8U, Operator::kCommutative, 2, 0, 1) \ V(I32x4SignSelect, Operator::kNoProperties, 3, 0, 1) \ + V(I32x4ExtAddPairwiseI16x8S, Operator::kNoProperties, 1, 0, 1) \ + V(I32x4ExtAddPairwiseI16x8U, Operator::kNoProperties, 1, 0, 1) \ V(I16x8Splat, Operator::kNoProperties, 1, 0, 1) \ V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1) \ V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1) \ @@ -501,6 +503,8 @@ ShiftKind ShiftKindOf(Operator const* op) { V(I16x8ExtMulLowI8x16U, Operator::kCommutative, 2, 0, 1) \ V(I16x8ExtMulHighI8x16U, Operator::kCommutative, 2, 0, 1) \ V(I16x8SignSelect, Operator::kNoProperties, 3, 0, 1) \ + V(I16x8ExtAddPairwiseI8x16S, Operator::kNoProperties, 1, 0, 1) \ + V(I16x8ExtAddPairwiseI8x16U, Operator::kNoProperties, 1, 0, 1) \ V(I8x16Splat, Operator::kNoProperties, 1, 0, 1) \ V(I8x16Neg, Operator::kNoProperties, 1, 0, 1) \ V(I8x16Shl, Operator::kNoProperties, 2, 0, 1) \ diff --git a/src/compiler/machine-operator.h b/src/compiler/machine-operator.h index 2d42087cbd..7912c55de5 100644 --- a/src/compiler/machine-operator.h +++ b/src/compiler/machine-operator.h @@ -716,6 +716,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* I32x4ExtMulLowI16x8U(); const Operator* I32x4ExtMulHighI16x8U(); const Operator* I32x4SignSelect(); + const Operator* I32x4ExtAddPairwiseI16x8S(); + const Operator* I32x4ExtAddPairwiseI16x8U(); const Operator* I16x8Splat(); const Operator* I16x8ExtractLaneU(int32_t); @@ -759,6 +761,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* I16x8ExtMulLowI8x16U(); const Operator* I16x8ExtMulHighI8x16U(); const Operator* I16x8SignSelect(); + const Operator* I16x8ExtAddPairwiseI8x16S(); + const Operator* I16x8ExtAddPairwiseI8x16U(); const Operator* I8x16Splat(); const Operator* I8x16ExtractLaneU(int32_t); diff --git a/src/compiler/opcodes.h b/src/compiler/opcodes.h index c45b1701b2..4b92fe68bd 100644 --- a/src/compiler/opcodes.h +++ b/src/compiler/opcodes.h @@ -875,6 +875,8 @@ V(I32x4ExtMulLowI16x8U) \ V(I32x4ExtMulHighI16x8U) \ V(I32x4SignSelect) \ + V(I32x4ExtAddPairwiseI16x8S) \ + V(I32x4ExtAddPairwiseI16x8U) \ V(I16x8Splat) \ V(I16x8ExtractLaneU) \ V(I16x8ExtractLaneS) \ @@ -920,6 +922,8 @@ V(I16x8ExtMulLowI8x16U) \ V(I16x8ExtMulHighI8x16U) \ V(I16x8SignSelect) \ + V(I16x8ExtAddPairwiseI8x16S) \ + V(I16x8ExtAddPairwiseI8x16U) \ V(I8x16Splat) \ V(I8x16ExtractLaneU) \ V(I8x16ExtractLaneS) \ diff --git a/src/compiler/wasm-compiler.cc b/src/compiler/wasm-compiler.cc index 71224575f7..d6b7113b27 100644 --- a/src/compiler/wasm-compiler.cc +++ b/src/compiler/wasm-compiler.cc @@ -4793,6 +4793,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { case wasm::kExprI32x4SignSelect: return graph()->NewNode(mcgraph()->machine()->I32x4SignSelect(), inputs[0], inputs[1], inputs[2]); + case wasm::kExprI32x4ExtAddPairwiseI16x8S: + return graph()->NewNode(mcgraph()->machine()->I32x4ExtAddPairwiseI16x8S(), + inputs[0]); + case wasm::kExprI32x4ExtAddPairwiseI16x8U: + return graph()->NewNode(mcgraph()->machine()->I32x4ExtAddPairwiseI16x8U(), + inputs[0]); case wasm::kExprI16x8Splat: return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]); case wasm::kExprI16x8SConvertI8x16Low: @@ -4915,6 +4921,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { case wasm::kExprI16x8SignSelect: return graph()->NewNode(mcgraph()->machine()->I16x8SignSelect(), inputs[0], inputs[1], inputs[2]); + case wasm::kExprI16x8ExtAddPairwiseI8x16S: + return graph()->NewNode(mcgraph()->machine()->I16x8ExtAddPairwiseI8x16S(), + inputs[0]); + case wasm::kExprI16x8ExtAddPairwiseI8x16U: + return graph()->NewNode(mcgraph()->machine()->I16x8ExtAddPairwiseI8x16U(), + inputs[0]); case wasm::kExprI8x16Splat: return graph()->NewNode(mcgraph()->machine()->I8x16Splat(), inputs[0]); case wasm::kExprI8x16Neg: diff --git a/src/utils/utils.h b/src/utils/utils.h index 9bbea8b3c1..af8f34030f 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -217,6 +217,21 @@ Wide MultiplyLong(Narrow a, Narrow b) { return static_cast(a) * static_cast(b); } +// Add two numbers, returning a result that is twice as wide, no overflow. +// Put Wide first so we can use function template argument deduction for Narrow, +// and callers can provide only Wide. +template +Wide AddLong(Narrow a, Narrow b) { + static_assert( + std::is_integral::value && std::is_integral::value, + "only integral types"); + static_assert(std::is_signed::value == std::is_signed::value, + "both must have same signedness"); + static_assert(sizeof(Narrow) * 2 == sizeof(Wide), "only twice as long"); + + return static_cast(a) + static_cast(b); +} + // Helper macros for defining a contiguous sequence of field offset constants. // Example: (backslashes at the ends of respective lines of this multi-line // macro definition are omitted here to please the compiler) diff --git a/src/wasm/wasm-opcodes-inl.h b/src/wasm/wasm-opcodes-inl.h index 44144748c2..5e0f172bd5 100644 --- a/src/wasm/wasm-opcodes-inl.h +++ b/src/wasm/wasm-opcodes-inl.h @@ -357,6 +357,9 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { CASE_SIMDI_OP(SignSelect, "signselect") CASE_I64x2_OP(SignSelect, "signselect") + CASE_SIGN_OP(I32x4, ExtAddPairwiseI16x8, "extadd_pairwise_i16x8") + CASE_SIGN_OP(I16x8, ExtAddPairwiseI8x16, "extadd_pairwise_i8x6") + // Atomic operations. CASE_OP(AtomicNotify, "atomic.notify") CASE_INT_OP(AtomicWait, "atomic.wait") diff --git a/src/wasm/wasm-opcodes.h b/src/wasm/wasm-opcodes.h index 966ec30a2a..76812446a9 100644 --- a/src/wasm/wasm-opcodes.h +++ b/src/wasm/wasm-opcodes.h @@ -471,40 +471,44 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig, V(S128Store32Lane, 0xfd5e, v_is) \ V(S128Store64Lane, 0xfd5f, v_is) -#define FOREACH_SIMD_POST_MVP_OPCODE(V) \ - V(I8x16Mul, 0xfd75, s_ss) \ - V(I8x16Popcnt, 0xfd7c, s_s) \ - V(I8x16SignSelect, 0xfd7d, s_sss) \ - V(I16x8SignSelect, 0xfd7e, s_sss) \ - V(I32x4SignSelect, 0xfd7f, s_sss) \ - V(I64x2SignSelect, 0xfd94, s_sss) \ - V(I16x8Q15MulRSatS, 0xfd9c, s_ss) \ - V(I16x8ExtMulLowI8x16S, 0xfd9a, s_ss) \ - V(I16x8ExtMulHighI8x16S, 0xfd9d, s_ss) \ - V(I16x8ExtMulLowI8x16U, 0xfd9e, s_ss) \ - V(I16x8ExtMulHighI8x16U, 0xfd9f, s_ss) \ - V(I32x4ExtMulLowI16x8S, 0xfdbb, s_ss) \ - V(I32x4ExtMulHighI16x8S, 0xfdbd, s_ss) \ - V(I32x4ExtMulLowI16x8U, 0xfdbe, s_ss) \ - V(I32x4ExtMulHighI16x8U, 0xfdbf, s_ss) \ - V(I64x2ExtMulLowI32x4S, 0xfdd2, s_ss) \ - V(I64x2ExtMulHighI32x4S, 0xfdd3, s_ss) \ - V(I64x2ExtMulLowI32x4U, 0xfdd6, s_ss) \ - V(I64x2ExtMulHighI32x4U, 0xfdd7, s_ss) \ - V(I64x2Eq, 0xfdc0, s_ss) \ - V(F32x4Qfma, 0xfdb4, s_sss) \ - V(I64x2BitMask, 0xfdc4, i_s) \ - V(I64x2SConvertI32x4Low, 0xfdc7, s_s) \ - V(I64x2SConvertI32x4High, 0xfdc8, s_s) \ - V(I64x2UConvertI32x4Low, 0xfdc9, s_s) \ - V(I64x2UConvertI32x4High, 0xfdca, s_s) \ - V(F32x4Qfms, 0xfdd4, s_sss) \ - V(F64x2Qfma, 0xfdfe, s_sss) \ - V(F64x2Qfms, 0xfdff, s_sss) \ - V(I16x8AddHoriz, 0xfdaf, s_ss) \ - V(I32x4AddHoriz, 0xfdb0, s_ss) \ - V(F32x4AddHoriz, 0xfdb2, s_ss) \ - V(F32x4RecipApprox, 0xfdb3, s_s) \ +#define FOREACH_SIMD_POST_MVP_OPCODE(V) \ + V(I8x16Mul, 0xfd75, s_ss) \ + V(I8x16Popcnt, 0xfd7c, s_s) \ + V(I8x16SignSelect, 0xfd7d, s_sss) \ + V(I16x8SignSelect, 0xfd7e, s_sss) \ + V(I32x4SignSelect, 0xfd7f, s_sss) \ + V(I64x2SignSelect, 0xfd94, s_sss) \ + V(I16x8Q15MulRSatS, 0xfd9c, s_ss) \ + V(I16x8ExtMulLowI8x16S, 0xfd9a, s_ss) \ + V(I16x8ExtMulHighI8x16S, 0xfd9d, s_ss) \ + V(I16x8ExtMulLowI8x16U, 0xfd9e, s_ss) \ + V(I16x8ExtMulHighI8x16U, 0xfd9f, s_ss) \ + V(I32x4ExtMulLowI16x8S, 0xfdbb, s_ss) \ + V(I32x4ExtMulHighI16x8S, 0xfdbd, s_ss) \ + V(I32x4ExtMulLowI16x8U, 0xfdbe, s_ss) \ + V(I32x4ExtMulHighI16x8U, 0xfdbf, s_ss) \ + V(I64x2ExtMulLowI32x4S, 0xfdd2, s_ss) \ + V(I64x2ExtMulHighI32x4S, 0xfdd3, s_ss) \ + V(I64x2ExtMulLowI32x4U, 0xfdd6, s_ss) \ + V(I64x2ExtMulHighI32x4U, 0xfdd7, s_ss) \ + V(I32x4ExtAddPairwiseI16x8S, 0xfda5, s_s) \ + V(I32x4ExtAddPairwiseI16x8U, 0xfda6, s_s) \ + V(I16x8ExtAddPairwiseI8x16S, 0xfdc2, s_s) \ + V(I16x8ExtAddPairwiseI8x16U, 0xfdc3, s_s) \ + V(I64x2Eq, 0xfdc0, s_ss) \ + V(F32x4Qfma, 0xfdb4, s_sss) \ + V(I64x2BitMask, 0xfdc4, i_s) \ + V(I64x2SConvertI32x4Low, 0xfdc7, s_s) \ + V(I64x2SConvertI32x4High, 0xfdc8, s_s) \ + V(I64x2UConvertI32x4Low, 0xfdc9, s_s) \ + V(I64x2UConvertI32x4High, 0xfdca, s_s) \ + V(F32x4Qfms, 0xfdd4, s_sss) \ + V(F64x2Qfma, 0xfdfe, s_sss) \ + V(F64x2Qfms, 0xfdff, s_sss) \ + V(I16x8AddHoriz, 0xfdaf, s_ss) \ + V(I32x4AddHoriz, 0xfdb0, s_ss) \ + V(F32x4AddHoriz, 0xfdb2, s_ss) \ + V(F32x4RecipApprox, 0xfdb3, s_s) \ V(F32x4RecipSqrtApprox, 0xfdbc, s_s) #define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \ diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index 7bae3f8852..d5983b655e 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -1876,6 +1876,57 @@ WASM_SIMD_TEST(S128Not) { [](int32_t x) { return ~x; }); } +#if V8_TARGET_ARCH_ARM64 +// TODO(v8:11086) Prototype i32x4.extadd_pairwise_i16x8_{s,u} +template +void RunExtAddPairwiseTest(TestExecutionTier execution_tier, + LowerSimd lower_simd, WasmOpcode ext_add_pairwise, + WasmOpcode splat) { + FLAG_SCOPE(wasm_simd_post_mvp); + constexpr int num_lanes = kSimd128Size / sizeof(Wide); + WasmRunner r(execution_tier, lower_simd); + Wide* g = r.builder().template AddGlobal(kWasmS128); + + // TODO(v8:11086) We splat the same value, so pairwise adding ends up adding + // the same value to itself, consider a more complicated test, like having 2 + // vectors, and shuffling them. + BUILD(r, WASM_GET_LOCAL(0), WASM_SIMD_OP(splat), + WASM_SIMD_OP(ext_add_pairwise), kExprGlobalSet, 0, WASM_ONE); + + for (Narrow x : compiler::ValueHelper::GetVector()) { + r.Call(x); + Wide expected = AddLong(x, x); + for (int i = 0; i < num_lanes; i++) { + CHECK_EQ(expected, ReadLittleEndianValue(&g[i])); + } + } +} + +WASM_SIMD_TEST_NO_LOWERING(I32x4ExtAddPairwiseI16x8S) { + RunExtAddPairwiseTest(execution_tier, lower_simd, + kExprI32x4ExtAddPairwiseI16x8S, + kExprI16x8Splat); +} + +WASM_SIMD_TEST_NO_LOWERING(I32x4ExtAddPairwiseI16x8U) { + RunExtAddPairwiseTest(execution_tier, lower_simd, + kExprI32x4ExtAddPairwiseI16x8U, + kExprI16x8Splat); +} + +WASM_SIMD_TEST_NO_LOWERING(I16x8ExtAddPairwiseI8x16S) { + RunExtAddPairwiseTest(execution_tier, lower_simd, + kExprI16x8ExtAddPairwiseI8x16S, + kExprI8x16Splat); +} + +WASM_SIMD_TEST_NO_LOWERING(I16x8ExtAddPairwiseI8x16U) { + RunExtAddPairwiseTest(execution_tier, lower_simd, + kExprI16x8ExtAddPairwiseI8x16U, + kExprI8x16Splat); +} +#endif // V8_TARGET_ARCH_ARM64 + void RunI32x4BinOpTest(TestExecutionTier execution_tier, LowerSimd lower_simd, WasmOpcode opcode, Int32BinOp expected_op) { WasmRunner r(execution_tier, lower_simd); diff --git a/test/common/wasm/wasm-interpreter.cc b/test/common/wasm/wasm-interpreter.cc index 5a948c5e3f..cdbcff8b41 100644 --- a/test/common/wasm/wasm-interpreter.cc +++ b/test/common/wasm/wasm-interpreter.cc @@ -2789,6 +2789,18 @@ class WasmInterpreterInternals { case kExprI64x2SignSelect: { return DoSimdSignSelect(); } + case kExprI32x4ExtAddPairwiseI16x8S: { + return DoSimdExtAddPairwise(); + } + case kExprI32x4ExtAddPairwiseI16x8U: { + return DoSimdExtAddPairwise(); + } + case kExprI16x8ExtAddPairwiseI8x16S: { + return DoSimdExtAddPairwise(); + } + case kExprI16x8ExtAddPairwiseI8x16U: { + return DoSimdExtAddPairwise(); + } default: return false; } @@ -2924,6 +2936,21 @@ class WasmInterpreterInternals { return true; } + template + bool DoSimdExtAddPairwise() { + constexpr int lanes = kSimd128Size / sizeof(DstSimdType::val[0]); + auto v = Pop().to_s128().to(); + DstSimdType res; + for (int i = 0; i < lanes; ++i) { + res.val[LANE(i, res)] = + AddLong(static_cast(v.val[LANE(i * 2, v)]), + static_cast(v.val[LANE(i * 2 + 1, v)])); + } + Push(WasmValue(Simd128(res))); + return true; + } + // Check if our control stack (frames_) exceeds the limit. Trigger stack // overflow if it does, and unwinding the current frame. // Returns true if execution can continue, false if the stack was fully