[wasm-simd][arm] Prototype i32x4.dot_i16x8_s
This implements I32x4DotI16x8S for arm. Bug: v8:10583 Change-Id: I4541f4f5bc7daba03c6ab2040589893c8ef571bc Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2230787 Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#68550}
This commit is contained in:
parent
1ae2636293
commit
91bf68ae70
@ -4338,7 +4338,6 @@ void Assembler::vmull(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src1,
|
||||
src2.split_code(&vm, &m);
|
||||
int size = NeonSz(dt);
|
||||
int u = NeonU(dt);
|
||||
if (!u) UNIMPLEMENTED();
|
||||
emit(0xFU * B28 | B25 | u * B24 | B23 | d * B22 | size * B20 | vn * B16 |
|
||||
vd * B12 | 0xC * B8 | n * B7 | m * B5 | vm);
|
||||
}
|
||||
|
@ -2434,6 +2434,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ VmovLow(dst, tmp2.low());
|
||||
break;
|
||||
}
|
||||
case kArmI32x4DotI16x8S: {
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
Simd128Register lhs = i.InputSimd128Register(0);
|
||||
Simd128Register rhs = i.InputSimd128Register(1);
|
||||
Simd128Register tmp1 = i.TempSimd128Register(0);
|
||||
UseScratchRegisterScope temps(tasm());
|
||||
Simd128Register scratch = temps.AcquireQ();
|
||||
__ vmull(NeonS16, tmp1, lhs.low(), rhs.low());
|
||||
__ vmull(NeonS16, scratch, lhs.high(), rhs.high());
|
||||
__ vpadd(Neon32, dst.low(), tmp1.low(), tmp1.high());
|
||||
__ vpadd(Neon32, dst.high(), scratch.low(), scratch.high());
|
||||
break;
|
||||
}
|
||||
case kArmI16x8Splat: {
|
||||
__ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
|
||||
break;
|
||||
|
@ -207,6 +207,7 @@ namespace compiler {
|
||||
V(ArmI32x4GeU) \
|
||||
V(ArmI32x4Abs) \
|
||||
V(ArmI32x4BitMask) \
|
||||
V(ArmI32x4DotI16x8S) \
|
||||
V(ArmI16x8Splat) \
|
||||
V(ArmI16x8ExtractLaneS) \
|
||||
V(ArmI16x8ReplaceLane) \
|
||||
|
@ -187,6 +187,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArmI32x4GeU:
|
||||
case kArmI32x4Abs:
|
||||
case kArmI32x4BitMask:
|
||||
case kArmI32x4DotI16x8S:
|
||||
case kArmI16x8Splat:
|
||||
case kArmI16x8ExtractLaneS:
|
||||
case kArmI16x8ReplaceLane:
|
||||
|
@ -2633,6 +2633,14 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
||||
V(S128Xor, kArmS128Xor) \
|
||||
V(S128AndNot, kArmS128AndNot)
|
||||
|
||||
void InstructionSelector::VisitI32x4DotI16x8S(Node* node) {
|
||||
ArmOperandGenerator g(this);
|
||||
InstructionOperand temps[] = {g.TempSimd128Register()};
|
||||
Emit(kArmI32x4DotI16x8S, g.DefineAsRegister(node),
|
||||
g.UseUniqueRegister(node->InputAt(0)),
|
||||
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitS128Zero(Node* node) {
|
||||
ArmOperandGenerator g(this);
|
||||
Emit(kArmS128Zero, g.DefineAsRegister(node));
|
||||
|
@ -2705,10 +2705,12 @@ void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
|
||||
// && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 && \
|
||||
!V8_TARGET_ARCH_ARM
|
||||
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
|
||||
void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
|
||||
// && !V8_TARGET_ARCH_ARM
|
||||
|
||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||
|
||||
|
@ -2065,6 +2065,16 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "vshr.s%d q%d, q%d, #%d",
|
||||
size, Vd, Vm, shift);
|
||||
} else if (instr->Bits(11, 8) == 0xC && instr->Bit(6) == 0 &&
|
||||
instr->Bit(4) == 0) {
|
||||
// vmull.s<size> Qd, Dn, Dm
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
int Vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
int size = 8 << instr->Bits(21, 20);
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "vmull.s%d q%d, d%d, d%d",
|
||||
size, Vd, Vn, Vm);
|
||||
} else {
|
||||
Unknown(instr);
|
||||
}
|
||||
|
@ -4241,6 +4241,28 @@ void RoundingAverageUnsigned(Simulator* simulator, int Vd, int Vm, int Vn) {
|
||||
simulator->set_neon_register<T, SIZE>(Vd, src1);
|
||||
}
|
||||
|
||||
template <typename NarrowType, typename WideType>
|
||||
void MultiplyLong(Simulator* simulator, int Vd, int Vn, int Vm) {
|
||||
DCHECK_EQ(sizeof(WideType), 2 * sizeof(NarrowType));
|
||||
static const int kElems = kSimd128Size / sizeof(WideType);
|
||||
NarrowType src1[kElems], src2[kElems];
|
||||
WideType dst[kElems];
|
||||
|
||||
// Get the entire d reg, then memcpy it to an array so we can address the
|
||||
// underlying datatype easily.
|
||||
uint64_t tmp;
|
||||
simulator->get_d_register(Vn, &tmp);
|
||||
memcpy(src1, &tmp, sizeof(tmp));
|
||||
simulator->get_d_register(Vm, &tmp);
|
||||
memcpy(src2, &tmp, sizeof(tmp));
|
||||
|
||||
for (int i = 0; i < kElems; i++) {
|
||||
dst[i] = WideType{src1[i]} * WideType{src2[i]};
|
||||
}
|
||||
|
||||
simulator->set_neon_register<WideType>(Vd, dst);
|
||||
}
|
||||
|
||||
void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
switch (instr->SpecialValue()) {
|
||||
case 4: {
|
||||
@ -4686,6 +4708,21 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
ArithmeticShiftRight<int64_t, kSimd128Size>(this, Vd, Vm, shift);
|
||||
break;
|
||||
}
|
||||
} else if (instr->Bits(11, 8) == 0xC && instr->Bit(6) == 0 &&
|
||||
instr->Bit(4) == 0) {
|
||||
// vmull.s<size> Qd, Dn, Dm
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
int Vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
switch (size) {
|
||||
case Neon16: {
|
||||
MultiplyLong<int16_t, int32_t>(this, Vd, Vn, Vm);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
@ -5579,18 +5616,17 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
instr->Bit(4) == 0) {
|
||||
// vmull.u<size> Qd, Dn, Dm
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
if (size != Neon32) UNIMPLEMENTED();
|
||||
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
int Vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
uint64_t src1, src2, dst[2];
|
||||
|
||||
get_d_register(Vn, &src1);
|
||||
get_d_register(Vm, &src2);
|
||||
dst[0] = (src1 & 0xFFFFFFFFULL) * (src2 & 0xFFFFFFFFULL);
|
||||
dst[1] = (src1 >> 32) * (src2 >> 32);
|
||||
set_neon_register<uint64_t>(Vd, dst);
|
||||
switch (size) {
|
||||
case Neon32: {
|
||||
MultiplyLong<uint32_t, uint64_t>(this, Vd, Vn, Vm);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
@ -2313,7 +2313,8 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
|
||||
}
|
||||
|
||||
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
|
||||
V8_TARGET_ARCH_ARM
|
||||
WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
|
||||
@ -2340,7 +2341,8 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 ||
|
||||
// V8_TARGET_ARCH_ARM
|
||||
|
||||
void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode opcode, Int16ShiftOp expected_op) {
|
||||
|
Loading…
Reference in New Issue
Block a user