[wasm-simd][ia32] Prototype i32x4.dot_i16x8_s
This implements I32x4DotI16x8S for ia32. Also fixes instruction-selector for SIMD ops, they should all set operand1 to be a register, since we do not have memory alignment yet. Bug: v8:10583 Change-Id: Id273816efd5eea128580f3f7bde533a8e1b2435d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2231031 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/master@{#68444}
This commit is contained in:
parent
1c39569e2e
commit
08ccfb2002
@ -385,6 +385,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
|||||||
AVX_PACKED_OP3(Psrlq, psrlq)
|
AVX_PACKED_OP3(Psrlq, psrlq)
|
||||||
AVX_PACKED_OP3(Psraw, psraw)
|
AVX_PACKED_OP3(Psraw, psraw)
|
||||||
AVX_PACKED_OP3(Psrad, psrad)
|
AVX_PACKED_OP3(Psrad, psrad)
|
||||||
|
AVX_PACKED_OP3(Pmaddwd, pmaddwd)
|
||||||
AVX_PACKED_OP3(Paddd, paddd)
|
AVX_PACKED_OP3(Paddd, paddd)
|
||||||
AVX_PACKED_OP3(Paddq, paddq)
|
AVX_PACKED_OP3(Paddq, paddq)
|
||||||
AVX_PACKED_OP3(Psubq, psubq)
|
AVX_PACKED_OP3(Psubq, psubq)
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
V(packsswb, 66, 0F, 63) \
|
V(packsswb, 66, 0F, 63) \
|
||||||
V(packssdw, 66, 0F, 6B) \
|
V(packssdw, 66, 0F, 6B) \
|
||||||
V(packuswb, 66, 0F, 67) \
|
V(packuswb, 66, 0F, 67) \
|
||||||
|
V(pmaddwd, 66, 0F, F5) \
|
||||||
V(paddb, 66, 0F, FC) \
|
V(paddb, 66, 0F, FC) \
|
||||||
V(paddw, 66, 0F, FD) \
|
V(paddw, 66, 0F, FD) \
|
||||||
V(paddd, 66, 0F, FE) \
|
V(paddd, 66, 0F, FE) \
|
||||||
|
@ -2807,6 +2807,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
__ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
|
__ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case kIA32I32x4DotI16x8S: {
|
||||||
|
__ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||||
|
i.InputSimd128Register(1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case kIA32I16x8Splat: {
|
case kIA32I16x8Splat: {
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
__ Movd(dst, i.InputOperand(0));
|
__ Movd(dst, i.InputOperand(0));
|
||||||
|
@ -234,6 +234,7 @@ namespace compiler {
|
|||||||
V(AVXI32x4GeU) \
|
V(AVXI32x4GeU) \
|
||||||
V(IA32I32x4Abs) \
|
V(IA32I32x4Abs) \
|
||||||
V(IA32I32x4BitMask) \
|
V(IA32I32x4BitMask) \
|
||||||
|
V(IA32I32x4DotI16x8S) \
|
||||||
V(IA32I16x8Splat) \
|
V(IA32I16x8Splat) \
|
||||||
V(IA32I16x8ExtractLaneU) \
|
V(IA32I16x8ExtractLaneU) \
|
||||||
V(IA32I16x8ExtractLaneS) \
|
V(IA32I16x8ExtractLaneS) \
|
||||||
|
@ -215,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kAVXI32x4GeU:
|
case kAVXI32x4GeU:
|
||||||
case kIA32I32x4Abs:
|
case kIA32I32x4Abs:
|
||||||
case kIA32I32x4BitMask:
|
case kIA32I32x4BitMask:
|
||||||
|
case kIA32I32x4DotI16x8S:
|
||||||
case kIA32I16x8Splat:
|
case kIA32I16x8Splat:
|
||||||
case kIA32I16x8ExtractLaneU:
|
case kIA32I16x8ExtractLaneU:
|
||||||
case kIA32I16x8ExtractLaneS:
|
case kIA32I16x8ExtractLaneS:
|
||||||
|
@ -277,6 +277,23 @@ void VisitRRSimd(InstructionSelector* selector, Node* node,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
|
||||||
|
// a register as we don't have memory alignment yet. For AVX, memory operands
|
||||||
|
// are fine, but can have performance issues if not aligned to 16/32 bytes
|
||||||
|
// (based on load size), see SDM Vol 1, chapter 14.9
|
||||||
|
void VisitRROSimd(InstructionSelector* selector, Node* node,
|
||||||
|
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
|
||||||
|
IA32OperandGenerator g(selector);
|
||||||
|
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
|
||||||
|
if (selector->IsSupported(AVX)) {
|
||||||
|
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0,
|
||||||
|
g.Use(node->InputAt(1)));
|
||||||
|
} else {
|
||||||
|
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0,
|
||||||
|
g.UseRegister(node->InputAt(1)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void VisitRRISimd(InstructionSelector* selector, Node* node,
|
void VisitRRISimd(InstructionSelector* selector, Node* node,
|
||||||
ArchOpcode opcode) {
|
ArchOpcode opcode) {
|
||||||
IA32OperandGenerator g(selector);
|
IA32OperandGenerator g(selector);
|
||||||
@ -2109,6 +2126,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
|||||||
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
|
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
|
||||||
V(I64x2Add) \
|
V(I64x2Add) \
|
||||||
V(I64x2Sub) \
|
V(I64x2Sub) \
|
||||||
|
V(I32x4DotI16x8S) \
|
||||||
V(I16x8RoundingAverageU) \
|
V(I16x8RoundingAverageU) \
|
||||||
V(I8x16RoundingAverageU)
|
V(I8x16RoundingAverageU)
|
||||||
|
|
||||||
@ -2422,17 +2440,17 @@ SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
|
|||||||
#undef VISIT_SIMD_ALLTRUE
|
#undef VISIT_SIMD_ALLTRUE
|
||||||
#undef SIMD_ALLTRUE_LIST
|
#undef SIMD_ALLTRUE_LIST
|
||||||
|
|
||||||
#define VISIT_SIMD_BINOP(Opcode) \
|
#define VISIT_SIMD_BINOP(Opcode) \
|
||||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||||
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \
|
VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \
|
||||||
}
|
}
|
||||||
SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
|
SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
|
||||||
#undef VISIT_SIMD_BINOP
|
#undef VISIT_SIMD_BINOP
|
||||||
#undef SIMD_BINOP_LIST
|
#undef SIMD_BINOP_LIST
|
||||||
|
|
||||||
#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
|
#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
|
||||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||||
VisitRROFloat(this, node, kIA32##Opcode, kIA32##Opcode); \
|
VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \
|
||||||
}
|
}
|
||||||
SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
|
SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
|
||||||
#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
|
#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
|
||||||
|
@ -2705,10 +2705,10 @@ void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
|
|||||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
|
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
|
||||||
// && !V8_TARGET_ARCH_IA32
|
// && !V8_TARGET_ARCH_IA32
|
||||||
|
|
||||||
#if !V8_TARGET_ARCH_X64
|
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||||
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
|
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
|
||||||
void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
|
void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
|
||||||
#endif // !V8_TARGET_ARCH_X64
|
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||||
|
|
||||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||||
|
|
||||||
|
@ -2317,7 +2317,7 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
|
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
|
||||||
#if V8_TARGET_ARCH_X64
|
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
|
||||||
WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
|
WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
|
||||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||||
|
|
||||||
@ -2344,7 +2344,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // V8_TARGET_ARCH_X64
|
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
|
||||||
|
|
||||||
void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||||
WasmOpcode opcode, Int16ShiftOp expected_op) {
|
WasmOpcode opcode, Int16ShiftOp expected_op) {
|
||||||
|
Loading…
Reference in New Issue
Block a user