[wasm-relaxed-simd] Implement dot product instructions for ia32/x64
Reference lowering in the corresponding issue: https://github.com/WebAssembly/relaxed-simd/issues/52 Bug: v8:12284 Change-Id: Ia59419f41ae1e53804b0fdb7169bf6f56f864c53 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3862956 Reviewed-by: Thibaud Michaud <thibaudm@chromium.org> Commit-Queue: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/main@{#82923}
This commit is contained in:
parent
ac23cc0470
commit
f165b31004
@ -704,6 +704,21 @@ void SharedTurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
|
||||
Pxor(dst, scratch);
|
||||
}
|
||||
|
||||
void SharedTurboAssembler::I16x8DotI8x16I7x16S(XMMRegister dst,
|
||||
XMMRegister src1,
|
||||
XMMRegister src2) {
|
||||
ASM_CODE_COMMENT(this);
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vpmaddubsw(dst, src2, src1);
|
||||
} else {
|
||||
if (dst != src2) {
|
||||
movdqa(dst, src2);
|
||||
}
|
||||
pmaddubsw(dst, src1);
|
||||
}
|
||||
}
|
||||
|
||||
void SharedTurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
|
||||
XMMRegister src,
|
||||
XMMRegister tmp) {
|
||||
|
@ -442,6 +442,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
// Will move src1 to dst if AVX is not supported.
|
||||
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch);
|
||||
void I16x8DotI8x16I7x16S(XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp);
|
||||
// Requires that dst == src1 if AVX is not supported.
|
||||
|
@ -2089,6 +2089,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8DotI8x16I7x16S: {
|
||||
__ I16x8DotI8x16I7x16S(i.OutputSimd128Register(),
|
||||
i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kIA32F32x4Splat: {
|
||||
__ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
|
||||
break;
|
||||
|
@ -359,6 +359,7 @@ namespace compiler {
|
||||
V(IA32I32x4AllTrue) \
|
||||
V(IA32I16x8AllTrue) \
|
||||
V(IA32I8x16AllTrue) \
|
||||
V(IA32I16x8DotI8x16I7x16S) \
|
||||
V(IA32Word32AtomicPairLoad) \
|
||||
V(IA32Word32ReleasePairStore) \
|
||||
V(IA32Word32SeqCstPairStore) \
|
||||
|
@ -244,6 +244,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32I16x8ExtAddPairwiseI8x16U:
|
||||
case kIA32I16x8Q15MulRSatS:
|
||||
case kIA32I16x8RelaxedQ15MulRS:
|
||||
case kIA32I16x8DotI8x16I7x16S:
|
||||
case kIA32I8x16Splat:
|
||||
case kIA32I8x16ExtractLaneS:
|
||||
case kIA32Pinsrb:
|
||||
|
@ -3311,6 +3311,12 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) {
|
||||
VisitRRRR(this, node, kIA32F32x4Qfms);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8DotI8x16I7x16S(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
Emit(kIA32I16x8DotI8x16I7x16S, g.DefineAsRegister(node),
|
||||
g.UseUniqueRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
|
||||
}
|
||||
|
||||
void InstructionSelector::AddOutputToSelectContinuation(OperandGenerator* g,
|
||||
int first_input_index,
|
||||
Node* node) {
|
||||
|
@ -2802,11 +2802,13 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
|
||||
// && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32 &&
|
||||
// !V8_TARGET_ARCH_RISCV64 && !V8_TARGET_ARCH_RISCV32
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM64
|
||||
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitI16x8DotI8x16I7x16S(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM6 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM64
|
||||
void InstructionSelector::VisitI32x4DotI8x16I7x16AddS(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
@ -3651,6 +3651,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kX64I16x8DotI8x16I7x16S: {
|
||||
__ I16x8DotI8x16I7x16S(i.OutputSimd128Register(),
|
||||
i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kX64I8x16Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
if (HasRegisterInput(instr, 0)) {
|
||||
|
@ -335,6 +335,7 @@ namespace compiler {
|
||||
V(X64I16x8ExtAddPairwiseI8x16U) \
|
||||
V(X64I16x8Q15MulRSatS) \
|
||||
V(X64I16x8RelaxedQ15MulRS) \
|
||||
V(X64I16x8DotI8x16I7x16S) \
|
||||
V(X64I8x16Splat) \
|
||||
V(X64I8x16ExtractLaneS) \
|
||||
V(X64I8x16SConvertI16x8) \
|
||||
|
@ -280,6 +280,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I16x8ExtAddPairwiseI8x16U:
|
||||
case kX64I16x8Q15MulRSatS:
|
||||
case kX64I16x8RelaxedQ15MulRS:
|
||||
case kX64I16x8DotI8x16I7x16S:
|
||||
case kX64I8x16Splat:
|
||||
case kX64I8x16ExtractLaneS:
|
||||
case kX64I8x16SConvertI16x8:
|
||||
|
@ -4320,6 +4320,12 @@ void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
|
||||
VisitRR(this, node, code);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8DotI8x16I7x16S(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
Emit(kX64I16x8DotI8x16I7x16S, g.DefineAsRegister(node),
|
||||
g.UseUniqueRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
|
||||
}
|
||||
|
||||
void InstructionSelector::AddOutputToSelectContinuation(OperandGenerator* g,
|
||||
int first_input_index,
|
||||
Node* node) {
|
||||
|
@ -3670,7 +3670,7 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
|
||||
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
|
||||
I16x8DotI8x16I7x16S(dst.fp(), lhs.fp(), rhs.fp());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
|
||||
|
@ -3255,7 +3255,7 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
|
||||
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs) {
|
||||
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
|
||||
I16x8DotI8x16I7x16S(dst.fp(), lhs.fp(), rhs.fp());
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
|
||||
|
@ -435,7 +435,7 @@ WASM_RELAXED_SIMD_TEST(I16x8RelaxedQ15MulRS) {
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM64
|
||||
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
|
||||
WASM_RELAXED_SIMD_TEST(I16x8DotI8x16I7x16S) {
|
||||
WasmRunner<int32_t, int8_t, int8_t> r(execution_tier);
|
||||
int16_t* g = r.builder().template AddGlobal<int16_t>(kWasmS128);
|
||||
@ -460,7 +460,9 @@ WASM_RELAXED_SIMD_TEST(I16x8DotI8x16I7x16S) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
#if V8_TARGET_ARCH_ARM64
|
||||
WASM_RELAXED_SIMD_TEST(I32x4DotI8x16I7x16AddS) {
|
||||
WasmRunner<int32_t, int8_t, int8_t, int32_t> r(execution_tier);
|
||||
int32_t* g = r.builder().template AddGlobal<int32_t>(kWasmS128);
|
||||
|
Loading…
Reference in New Issue
Block a user