[wasm-simd][arm] Prototype extended multiply
Bug: v8:11008 Change-Id: Ic7be8370e3e820d225558995a9ad2295811e98a4 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2567531 Reviewed-by: Bill Budge <bbudge@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#71629}
This commit is contained in:
parent
996aadbd17
commit
5ce5f42990
@ -1862,6 +1862,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ and_(i.OutputRegister(0), i.InputRegister(0),
|
||||
Operand(kSpeculationPoisonRegister));
|
||||
break;
|
||||
case kArmVmullLow: {
|
||||
auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
|
||||
__ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).low(),
|
||||
i.InputSimd128Register(1).low());
|
||||
break;
|
||||
}
|
||||
case kArmVmullHigh: {
|
||||
auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
|
||||
__ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).high(),
|
||||
i.InputSimd128Register(1).high());
|
||||
break;
|
||||
}
|
||||
case kArmF64x2Splat: {
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
DoubleRegister src = i.InputDoubleRegister(0);
|
||||
|
@ -75,6 +75,8 @@ namespace compiler {
|
||||
V(ArmVabsF64) \
|
||||
V(ArmVnegF64) \
|
||||
V(ArmVsqrtF64) \
|
||||
V(ArmVmullLow) \
|
||||
V(ArmVmullHigh) \
|
||||
V(ArmVrintmF32) \
|
||||
V(ArmVrintmF64) \
|
||||
V(ArmVrintpF32) \
|
||||
|
@ -76,6 +76,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArmVabsF64:
|
||||
case kArmVnegF64:
|
||||
case kArmVsqrtF64:
|
||||
case kArmVmullLow:
|
||||
case kArmVmullHigh:
|
||||
case kArmVrintmF32:
|
||||
case kArmVrintmF64:
|
||||
case kArmVrintpF32:
|
||||
|
@ -86,7 +86,8 @@ void VisitRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
|
||||
g.UseRegister(node->InputAt(0)));
|
||||
}
|
||||
|
||||
void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
|
||||
void VisitRRR(InstructionSelector* selector, InstructionCode opcode,
|
||||
Node* node) {
|
||||
ArmOperandGenerator g(selector);
|
||||
selector->Emit(opcode, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(0)),
|
||||
@ -3096,6 +3097,30 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) {
|
||||
VisitF64x2PminOrPMax(this, kArmF64x2Pmax, node);
|
||||
}
|
||||
|
||||
#define EXT_MUL_LIST(V) \
|
||||
V(I16x8ExtMulLowI8x16S, kArmVmullLow, NeonS8) \
|
||||
V(I16x8ExtMulHighI8x16S, kArmVmullHigh, NeonS8) \
|
||||
V(I16x8ExtMulLowI8x16U, kArmVmullLow, NeonU8) \
|
||||
V(I16x8ExtMulHighI8x16U, kArmVmullHigh, NeonU8) \
|
||||
V(I32x4ExtMulLowI16x8S, kArmVmullLow, NeonS16) \
|
||||
V(I32x4ExtMulHighI16x8S, kArmVmullHigh, NeonS16) \
|
||||
V(I32x4ExtMulLowI16x8U, kArmVmullLow, NeonU16) \
|
||||
V(I32x4ExtMulHighI16x8U, kArmVmullHigh, NeonU16) \
|
||||
V(I64x2ExtMulLowI32x4S, kArmVmullLow, NeonS32) \
|
||||
V(I64x2ExtMulHighI32x4S, kArmVmullHigh, NeonS32) \
|
||||
V(I64x2ExtMulLowI32x4U, kArmVmullLow, NeonU32) \
|
||||
V(I64x2ExtMulHighI32x4U, kArmVmullHigh, NeonU32)
|
||||
|
||||
#define VISIT_EXT_MUL(OPCODE, VMULL, NEONSIZE) \
|
||||
void InstructionSelector::Visit##OPCODE(Node* node) { \
|
||||
VisitRRR(this, VMULL | MiscField::encode(NEONSIZE), node); \
|
||||
}
|
||||
|
||||
EXT_MUL_LIST(VISIT_EXT_MUL)
|
||||
|
||||
#undef VISIT_EXT_MUL
|
||||
#undef EXT_MUL_LIST
|
||||
|
||||
void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) {
|
||||
ArmOperandGenerator g(this);
|
||||
|
||||
|
@ -2737,30 +2737,7 @@ void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_ARM64
|
||||
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM64
|
||||
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
|
||||
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
// TODO(v8:10972) Prototype i64x2 widen i32x4.
|
||||
void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI64x2SConvertI32x4High(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI64x2UConvertI32x4Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
// TODO(v8:11002) Prototype i8x16.popcnt.
|
||||
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
|
||||
// TODO(v8:11008) Prototype extended multiplication.
|
||||
void InstructionSelector::VisitI64x2ExtMulLowI32x4S(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
@ -2798,6 +2775,31 @@ void InstructionSelector::VisitI16x8ExtMulLowI8x16U(Node* node) {
|
||||
void InstructionSelector::VisitI16x8ExtMulHighI8x16U(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM64
|
||||
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
|
||||
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
// TODO(v8:10972) Prototype i64x2 widen i32x4.
|
||||
void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI64x2SConvertI32x4High(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI64x2UConvertI32x4Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
// TODO(v8:11002) Prototype i8x16.popcnt.
|
||||
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
// TODO(v8:11086) Prototype extended pairwise add.
|
||||
void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) {
|
||||
|
@ -4842,9 +4842,9 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
|
||||
// Advanced SIMD three registers of different lengths.
|
||||
int u = instr->Bit(24);
|
||||
int opc = instr->Bits(11, 8);
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
if (opc == 0b1000) {
|
||||
// vmlal.u<size> Qd, Dn, Dm
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
if (size != Neon32) UNIMPLEMENTED();
|
||||
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
@ -4859,33 +4859,46 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
|
||||
dst[1] += (src1 >> 32) * (src2 >> 32);
|
||||
set_neon_register<uint64_t>(Vd, dst);
|
||||
} else if (opc == 0b1100) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
int Vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
if (u) {
|
||||
// vmull.u<size> Qd, Dn, Dm
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
int Vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
switch (size) {
|
||||
case Neon8: {
|
||||
MultiplyLong<uint8_t, uint16_t>(this, Vd, Vn, Vm);
|
||||
break;
|
||||
}
|
||||
case Neon16: {
|
||||
MultiplyLong<uint16_t, uint32_t>(this, Vd, Vn, Vm);
|
||||
break;
|
||||
}
|
||||
case Neon32: {
|
||||
MultiplyLong<uint32_t, uint64_t>(this, Vd, Vn, Vm);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
case Neon64: {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// vmull.s<size> Qd, Dn, Dm
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
int Vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
switch (size) {
|
||||
case Neon8: {
|
||||
MultiplyLong<int8_t, int16_t>(this, Vd, Vn, Vm);
|
||||
break;
|
||||
}
|
||||
case Neon16: {
|
||||
MultiplyLong<int16_t, int32_t>(this, Vd, Vn, Vm);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
case Neon32: {
|
||||
MultiplyLong<int32_t, int64_t>(this, Vd, Vn, Vm);
|
||||
break;
|
||||
}
|
||||
case Neon64: {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1208,6 +1208,10 @@ TEST(Neon) {
|
||||
COMPARE(vmul(Neon32, q15, q0, q8),
|
||||
"f260e970 vmul.i32 q15, q0, q8");
|
||||
|
||||
COMPARE(vmull(NeonU8, q15, d0, d8),
|
||||
"f3c0ec08 vmull.u8 q15, d0, d8");
|
||||
COMPARE(vmull(NeonS16, q15, d0, d8),
|
||||
"f2d0ec08 vmull.s16 q15, d0, d8");
|
||||
COMPARE(vmull(NeonU32, q15, d0, d8),
|
||||
"f3e0ec08 vmull.u32 q15, d0, d8");
|
||||
COMPARE(vmlal(NeonU32, q15, d0, d8),
|
||||
|
@ -2329,7 +2329,9 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) {
|
||||
RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS,
|
||||
SaturateRoundingQMul<int16_t>);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM64
|
||||
|
||||
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
|
||||
// TODO(v8:11008) Prototype extended multiplication.
|
||||
namespace {
|
||||
enum class MulHalf { kLow, kHigh };
|
||||
@ -2441,7 +2443,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2ExtMulHighI32x4U) {
|
||||
kExprI64x2ExtMulHighI32x4U, MultiplyLong,
|
||||
kExprI32x4Splat, MulHalf::kHigh);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM64
|
||||
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
|
||||
|
||||
WASM_SIMD_TEST(I32x4DotI16x8S) {
|
||||
WasmRunner<int32_t, int16_t, int16_t> r(execution_tier, lower_simd);
|
||||
|
Loading…
Reference in New Issue
Block a user