[wasm-simd][ia32] Prototype extended pairwise addition
Codegen is identical to x64. Tweaked a macro definition to do a dst == src1 check when AVX is not supported, and updated a single caller in LiftOff. Bug: v8:11086 Change-Id: Ic9645f3d1bf1c26a1aa6db6bc2fa67fc991f8bbb Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2579928 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#71756}
This commit is contained in:
parent
bc4308f37b
commit
d7de8fa4cb
@ -505,15 +505,16 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
|
||||
#define AVX_OP3_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
|
||||
sse_scope) \
|
||||
void macro_name(dst_type dst, src_type src) { \
|
||||
void macro_name(dst_type dst, dst_type src1, src_type src2) { \
|
||||
if (CpuFeatures::IsSupported(AVX)) { \
|
||||
CpuFeatureScope scope(this, AVX); \
|
||||
v##name(dst, dst, src); \
|
||||
v##name(dst, src1, src2); \
|
||||
return; \
|
||||
} \
|
||||
if (CpuFeatures::IsSupported(sse_scope)) { \
|
||||
CpuFeatureScope scope(this, sse_scope); \
|
||||
name(dst, src); \
|
||||
DCHECK_EQ(dst, src1); \
|
||||
name(dst, src2); \
|
||||
return; \
|
||||
} \
|
||||
UNREACHABLE(); \
|
||||
@ -523,6 +524,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP3_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSE4_1)
|
||||
|
||||
AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
|
||||
AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, XMMRegister, SSSE3)
|
||||
|
||||
#undef AVX_OP3_XO_SSE4
|
||||
#undef AVX_OP3_WITH_TYPE_SCOPE
|
||||
|
@ -63,9 +63,10 @@
|
||||
V(pxor, 66, 0F, EF)
|
||||
|
||||
#define SSSE3_INSTRUCTION_LIST(V) \
|
||||
V(phaddd, 66, 0F, 38, 02) \
|
||||
V(phaddw, 66, 0F, 38, 01) \
|
||||
V(pshufb, 66, 0F, 38, 00) \
|
||||
V(phaddw, 66, 0F, 38, 01) \
|
||||
V(phaddd, 66, 0F, 38, 02) \
|
||||
V(pmaddubsw, 66, 0F, 38, 04) \
|
||||
V(psignb, 66, 0F, 38, 08) \
|
||||
V(psignw, 66, 0F, 38, 09) \
|
||||
V(psignd, 66, 0F, 38, 0A)
|
||||
|
@ -2323,6 +2323,54 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4ExtAddPairwiseI16x8S: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
// kScratchDoubleReg = i16x8.splat(1)
|
||||
__ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psrlw(kScratchDoubleReg, byte{15});
|
||||
// pmaddwd multiplies signed words in kScratchDoubleReg and src, producing
|
||||
// signed doublewords, then adds pairwise.
|
||||
// src = |a|b|c|d|e|f|g|h|
|
||||
// dst = | a*1 + b*1 | c*1 + d*1 | e*1 + f*1 | g*1 + h*1 |
|
||||
__ Pmaddwd(dst, src, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4ExtAddPairwiseI16x8U: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
|
||||
// src = |a|b|c|d|e|f|g|h|
|
||||
// kScratchDoubleReg = i32x4.splat(0x0000FFFF)
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psrld(kScratchDoubleReg, kScratchDoubleReg, uint8_t{16});
|
||||
// kScratchDoubleReg =|0|b|0|d|0|f|0|h|
|
||||
__ Pand(kScratchDoubleReg, src);
|
||||
// dst = |0|a|0|c|0|e|0|g|
|
||||
__ Psrld(dst, src, byte{16});
|
||||
// dst = |a+b|c+d|e+f|g+h|
|
||||
__ Paddd(dst, src, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8ExtAddPairwiseI8x16S: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
DCHECK_NE(dst, src);
|
||||
// dst = i8x16.splat(1)
|
||||
__ Move(dst, uint32_t{0x01010101});
|
||||
__ Pshufd(dst, dst, byte{0});
|
||||
__ Pmaddubsw(dst, dst, src);
|
||||
break;
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8ExtAddPairwiseI8x16U: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// dst = i8x16.splat(1)
|
||||
__ Move(kScratchDoubleReg, uint32_t{0x01010101});
|
||||
__ Pshufd(kScratchDoubleReg, kScratchDoubleReg, byte{0});
|
||||
__ Pmaddubsw(dst, i.InputSimd128Register(0), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4SignSelect: {
|
||||
ASSEMBLE_SIMD_SIGN_SELECT(blendvps);
|
||||
break;
|
||||
|
@ -235,6 +235,8 @@ namespace compiler {
|
||||
V(IA32I32x4ExtMulHighI16x8S) \
|
||||
V(IA32I32x4ExtMulLowI16x8U) \
|
||||
V(IA32I32x4ExtMulHighI16x8U) \
|
||||
V(IA32I32x4ExtAddPairwiseI16x8S) \
|
||||
V(IA32I32x4ExtAddPairwiseI16x8U) \
|
||||
V(IA32I16x8Splat) \
|
||||
V(IA32I16x8ExtractLaneS) \
|
||||
V(IA32I16x8SConvertI8x16Low) \
|
||||
@ -293,6 +295,8 @@ namespace compiler {
|
||||
V(IA32I16x8ExtMulHighI8x16S) \
|
||||
V(IA32I16x8ExtMulLowI8x16U) \
|
||||
V(IA32I16x8ExtMulHighI8x16U) \
|
||||
V(IA32I16x8ExtAddPairwiseI8x16S) \
|
||||
V(IA32I16x8ExtAddPairwiseI8x16U) \
|
||||
V(IA32I8x16Splat) \
|
||||
V(IA32I8x16ExtractLaneS) \
|
||||
V(IA32Pinsrb) \
|
||||
|
@ -217,6 +217,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32I32x4ExtMulHighI16x8S:
|
||||
case kIA32I32x4ExtMulLowI16x8U:
|
||||
case kIA32I32x4ExtMulHighI16x8U:
|
||||
case kIA32I32x4ExtAddPairwiseI16x8S:
|
||||
case kIA32I32x4ExtAddPairwiseI16x8U:
|
||||
case kIA32I16x8Splat:
|
||||
case kIA32I16x8ExtractLaneS:
|
||||
case kIA32I16x8SConvertI8x16Low:
|
||||
@ -275,6 +277,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32I16x8ExtMulHighI8x16S:
|
||||
case kIA32I16x8ExtMulLowI8x16U:
|
||||
case kIA32I16x8ExtMulHighI8x16U:
|
||||
case kIA32I16x8ExtAddPairwiseI8x16S:
|
||||
case kIA32I16x8ExtAddPairwiseI8x16U:
|
||||
case kIA32I8x16Splat:
|
||||
case kIA32I8x16ExtractLaneS:
|
||||
case kIA32Pinsrb:
|
||||
|
@ -2360,7 +2360,7 @@ void InstructionSelector::VisitF64x2Max(Node* node) {
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF64x2Splat(Node* node) {
|
||||
VisitRRSimd(this, node, kIA32F64x2Splat, kIA32F64x2Splat);
|
||||
VisitRRSimd(this, node, kIA32F64x2Splat);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF64x2ExtractLane(Node* node) {
|
||||
@ -2427,7 +2427,7 @@ void InstructionSelector::VisitI64x2Mul(Node* node) {
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF32x4Splat(Node* node) {
|
||||
VisitRRSimd(this, node, kIA32F32x4Splat, kIA32F32x4Splat);
|
||||
VisitRRSimd(this, node, kIA32F32x4Splat);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF32x4ExtractLane(Node* node) {
|
||||
@ -3083,6 +3083,24 @@ void InstructionSelector::VisitI64x2SignSelect(Node* node) {
|
||||
VisitSignSelect(this, node, kIA32I64x2SignSelect);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) {
|
||||
VisitRRSimd(this, node, kIA32I32x4ExtAddPairwiseI16x8S);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8U(Node* node) {
|
||||
VisitRRSimd(this, node, kIA32I32x4ExtAddPairwiseI16x8U);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16S(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
Emit(kIA32I16x8ExtAddPairwiseI8x16S, g.DefineAsRegister(node),
|
||||
g.UseUniqueRegister(node->InputAt(0)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
|
||||
VisitRRSimd(this, node, kIA32I16x8ExtAddPairwiseI8x16U);
|
||||
}
|
||||
|
||||
// static
|
||||
MachineOperatorBuilder::Flags
|
||||
InstructionSelector::SupportedMachineOperatorFlags() {
|
||||
|
@ -2814,7 +2814,8 @@ void InstructionSelector::VisitPrefetchNonTemporal(Node* node) {
|
||||
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
|
||||
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && \
|
||||
!V8_TARGET_ARCH_IA32
|
||||
// TODO(v8:11086) Prototype extended pairwise add.
|
||||
void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
@ -2829,6 +2830,7 @@ void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
|
||||
// && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 && \
|
||||
!V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS64
|
||||
|
@ -4112,7 +4112,7 @@ void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
|
||||
Cvttps2dq(tmp, tmp);
|
||||
Pxor(tmp, liftoff::kScratchDoubleReg);
|
||||
Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
|
||||
Pmaxsd(tmp, liftoff::kScratchDoubleReg);
|
||||
Pmaxsd(tmp, tmp, liftoff::kScratchDoubleReg);
|
||||
// Convert to int. Overflow lanes above max_signed will be 0x80000000.
|
||||
Cvttps2dq(dst.fp(), dst.fp());
|
||||
// Add (src-max_signed) for overflow lanes.
|
||||
|
@ -1884,7 +1884,8 @@ WASM_SIMD_TEST(S128Not) {
|
||||
[](int32_t x) { return ~x; });
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
|
||||
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64 || \
|
||||
V8_TARGET_ARCH_IA32
|
||||
// TODO(v8:11086) Prototype i32x4.extadd_pairwise_i16x8_{s,u}
|
||||
template <typename Narrow, typename Wide>
|
||||
void RunExtAddPairwiseTest(TestExecutionTier execution_tier,
|
||||
@ -1933,7 +1934,8 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8ExtAddPairwiseI8x16U) {
|
||||
kExprI16x8ExtAddPairwiseI8x16U,
|
||||
kExprI8x16Splat);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
|
||||
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64 ||
|
||||
// V8_TARGET_ARCH_IA32
|
||||
|
||||
void RunI32x4BinOpTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode opcode, Int32BinOp expected_op) {
|
||||
|
Loading…
Reference in New Issue
Block a user