[wasm-simd][ia32] Prototype i32x4.dot_i16x8_s

This implements I32x4DotI16x8S for ia32.

Also fixes instruction-selector for SIMD ops, they should all set operand1 to be a register, since we do not have memory alignment yet.

Bug: v8:10583
Change-Id: Id273816efd5eea128580f3f7bde533a8e1b2435d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2231031
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68444}
This commit is contained in:
Ng Zhi An 2020-06-18 16:45:24 -07:00 committed by Commit Bot
parent 1c39569e2e
commit 08ccfb2002
8 changed files with 37 additions and 10 deletions

View File

@ -385,6 +385,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Psrlq, psrlq)
AVX_PACKED_OP3(Psraw, psraw)
AVX_PACKED_OP3(Psrad, psrad)
AVX_PACKED_OP3(Pmaddwd, pmaddwd)
AVX_PACKED_OP3(Paddd, paddd)
AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubq, psubq)

View File

@ -9,6 +9,7 @@
V(packsswb, 66, 0F, 63) \
V(packssdw, 66, 0F, 6B) \
V(packuswb, 66, 0F, 67) \
V(pmaddwd, 66, 0F, F5) \
V(paddb, 66, 0F, FC) \
V(paddw, 66, 0F, FD) \
V(paddd, 66, 0F, FE) \

View File

@ -2807,6 +2807,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
break;
}
case kIA32I32x4DotI16x8S: {
__ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kIA32I16x8Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));

View File

@ -234,6 +234,7 @@ namespace compiler {
V(AVXI32x4GeU) \
V(IA32I32x4Abs) \
V(IA32I32x4BitMask) \
V(IA32I32x4DotI16x8S) \
V(IA32I16x8Splat) \
V(IA32I16x8ExtractLaneU) \
V(IA32I16x8ExtractLaneS) \

View File

@ -215,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4GeU:
case kIA32I32x4Abs:
case kIA32I32x4BitMask:
case kIA32I32x4DotI16x8S:
case kIA32I16x8Splat:
case kIA32I16x8ExtractLaneU:
case kIA32I16x8ExtractLaneS:

View File

@ -277,6 +277,23 @@ void VisitRRSimd(InstructionSelector* selector, Node* node,
}
}
// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
// a register as we don't have memory alignment yet. For AVX, memory operands
// are fine, but can have performance issues if not aligned to 16/32 bytes
// (based on load size), see SDM Vol 1, chapter 14.9
void VisitRROSimd(InstructionSelector* selector, Node* node,
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
IA32OperandGenerator g(selector);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0,
g.Use(node->InputAt(1)));
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0,
g.UseRegister(node->InputAt(1)));
}
}
void VisitRRISimd(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
IA32OperandGenerator g(selector);
@ -2109,6 +2126,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
V(I64x2Add) \
V(I64x2Sub) \
V(I32x4DotI16x8S) \
V(I16x8RoundingAverageU) \
V(I8x16RoundingAverageU)
@ -2422,17 +2440,17 @@ SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
#undef VISIT_SIMD_ALLTRUE
#undef SIMD_ALLTRUE_LIST
#define VISIT_SIMD_BINOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \
#define VISIT_SIMD_BINOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \
}
SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
#undef VISIT_SIMD_BINOP
#undef SIMD_BINOP_LIST
#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROFloat(this, node, kIA32##Opcode, kIA32##Opcode); \
#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \
}
SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX

View File

@ -2705,10 +2705,10 @@ void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
// && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }

View File

@ -2317,7 +2317,7 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
}
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
#if V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
FLAG_SCOPE(wasm_simd_post_mvp);
@ -2344,7 +2344,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
}
}
}
#endif // V8_TARGET_ARCH_X64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int16ShiftOp expected_op) {