[wasm-simd][ia32] Prototype i32x4.dot_i16x8_s
This implements I32x4DotI16x8S for ia32. Also fixes instruction-selector for SIMD ops, they should all set operand1 to be a register, since we do not have memory alignment yet. Bug: v8:10583 Change-Id: Id273816efd5eea128580f3f7bde533a8e1b2435d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2231031 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/master@{#68444}
This commit is contained in:
parent
1c39569e2e
commit
08ccfb2002
@ -385,6 +385,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_PACKED_OP3(Psrlq, psrlq)
|
||||
AVX_PACKED_OP3(Psraw, psraw)
|
||||
AVX_PACKED_OP3(Psrad, psrad)
|
||||
AVX_PACKED_OP3(Pmaddwd, pmaddwd)
|
||||
AVX_PACKED_OP3(Paddd, paddd)
|
||||
AVX_PACKED_OP3(Paddq, paddq)
|
||||
AVX_PACKED_OP3(Psubq, psubq)
|
||||
|
@ -9,6 +9,7 @@
|
||||
V(packsswb, 66, 0F, 63) \
|
||||
V(packssdw, 66, 0F, 6B) \
|
||||
V(packuswb, 66, 0F, 67) \
|
||||
V(pmaddwd, 66, 0F, F5) \
|
||||
V(paddb, 66, 0F, FC) \
|
||||
V(paddw, 66, 0F, FD) \
|
||||
V(paddd, 66, 0F, FE) \
|
||||
|
@ -2807,6 +2807,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4DotI16x8S: {
|
||||
__ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ Movd(dst, i.InputOperand(0));
|
||||
|
@ -234,6 +234,7 @@ namespace compiler {
|
||||
V(AVXI32x4GeU) \
|
||||
V(IA32I32x4Abs) \
|
||||
V(IA32I32x4BitMask) \
|
||||
V(IA32I32x4DotI16x8S) \
|
||||
V(IA32I16x8Splat) \
|
||||
V(IA32I16x8ExtractLaneU) \
|
||||
V(IA32I16x8ExtractLaneS) \
|
||||
|
@ -215,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXI32x4GeU:
|
||||
case kIA32I32x4Abs:
|
||||
case kIA32I32x4BitMask:
|
||||
case kIA32I32x4DotI16x8S:
|
||||
case kIA32I16x8Splat:
|
||||
case kIA32I16x8ExtractLaneU:
|
||||
case kIA32I16x8ExtractLaneS:
|
||||
|
@ -277,6 +277,23 @@ void VisitRRSimd(InstructionSelector* selector, Node* node,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
|
||||
// a register as we don't have memory alignment yet. For AVX, memory operands
|
||||
// are fine, but can have performance issues if not aligned to 16/32 bytes
|
||||
// (based on load size), see SDM Vol 1, chapter 14.9
|
||||
void VisitRROSimd(InstructionSelector* selector, Node* node,
|
||||
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
|
||||
IA32OperandGenerator g(selector);
|
||||
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
|
||||
if (selector->IsSupported(AVX)) {
|
||||
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0,
|
||||
g.Use(node->InputAt(1)));
|
||||
} else {
|
||||
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0,
|
||||
g.UseRegister(node->InputAt(1)));
|
||||
}
|
||||
}
|
||||
|
||||
void VisitRRISimd(InstructionSelector* selector, Node* node,
|
||||
ArchOpcode opcode) {
|
||||
IA32OperandGenerator g(selector);
|
||||
@ -2109,6 +2126,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
||||
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
|
||||
V(I64x2Add) \
|
||||
V(I64x2Sub) \
|
||||
V(I32x4DotI16x8S) \
|
||||
V(I16x8RoundingAverageU) \
|
||||
V(I8x16RoundingAverageU)
|
||||
|
||||
@ -2424,7 +2442,7 @@ SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
|
||||
|
||||
#define VISIT_SIMD_BINOP(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \
|
||||
VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \
|
||||
}
|
||||
SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
|
||||
#undef VISIT_SIMD_BINOP
|
||||
@ -2432,7 +2450,7 @@ SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
|
||||
|
||||
#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
VisitRROFloat(this, node, kIA32##Opcode, kIA32##Opcode); \
|
||||
VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \
|
||||
}
|
||||
SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
|
||||
#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
|
||||
|
@ -2705,10 +2705,10 @@ void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
|
||||
// && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
|
||||
void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||
|
||||
|
@ -2317,7 +2317,7 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
|
||||
}
|
||||
|
||||
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
|
||||
#if V8_TARGET_ARCH_X64
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
|
||||
WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
|
||||
@ -2344,7 +2344,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode opcode, Int16ShiftOp expected_op) {
|
||||
|
Loading…
Reference in New Issue
Block a user