Reland "[wasm-simd][arm] Prototype i8x16.popcnt"
This is a reland of e2aa734aef
The original change was speculatively reverted in
https://crrev.com/c/2568925 but seems unrelated to the flakey
test failures (which are on x64).
Original change's description:
> [wasm-simd][arm] Prototype i8x16.popcnt
>
> Bug: v8:11002
> Change-Id: Ib97e51ed52249a1af7a4b879396b70a016991719
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2567534
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Bill Budge <bbudge@chromium.org>
> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#71552}
Bug: v8:11002
Change-Id: I714918e2640d27d29953f940e3bedeb9ea2f8a08
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2567243
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: Bill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71585}
This commit is contained in:
parent
a0f105c8bb
commit
d0f7c6a8fa
@ -4799,7 +4799,7 @@ void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1,
|
|||||||
n * B7 | B6 | m * B5 | vm);
|
n * B7 | B6 | m * B5 | vm);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN };
|
enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN, VCNT };
|
||||||
|
|
||||||
static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonRegType reg_type,
|
static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonRegType reg_type,
|
||||||
NeonSize size, int dst_code, int src_code) {
|
NeonSize size, int dst_code, int src_code) {
|
||||||
@ -4823,6 +4823,9 @@ static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonRegType reg_type,
|
|||||||
case VTRN:
|
case VTRN:
|
||||||
op_encoding = 0x2 * B16 | B7;
|
op_encoding = 0x2 * B16 | B7;
|
||||||
break;
|
break;
|
||||||
|
case VCNT:
|
||||||
|
op_encoding = 0xA * B7;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
@ -4907,6 +4910,13 @@ void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
|
|||||||
emit(EncodeNeonSizedOp(VTRN, NEON_Q, size, src1.code(), src2.code()));
|
emit(EncodeNeonSizedOp(VTRN, NEON_Q, size, src1.code(), src2.code()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::vcnt(QwNeonRegister dst, QwNeonRegister src) {
|
||||||
|
// Qd = vcnt(Qm) SIMD Vector Count Set Bits.
|
||||||
|
// Instruction details available at ARM DDI 0487F.b, F6-5094.
|
||||||
|
DCHECK(IsEnabled(NEON));
|
||||||
|
emit(EncodeNeonSizedOp(VCNT, NEON_Q, Neon8, dst.code(), src.code()));
|
||||||
|
}
|
||||||
|
|
||||||
// Encode NEON vtbl / vtbx instruction.
|
// Encode NEON vtbl / vtbx instruction.
|
||||||
static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list,
|
static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list,
|
||||||
DwVfpRegister index, bool vtbx) {
|
DwVfpRegister index, bool vtbx) {
|
||||||
|
@ -966,6 +966,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
|||||||
void vtbx(DwVfpRegister dst, const NeonListOperand& list,
|
void vtbx(DwVfpRegister dst, const NeonListOperand& list,
|
||||||
DwVfpRegister index);
|
DwVfpRegister index);
|
||||||
|
|
||||||
|
void vcnt(QwNeonRegister dst, QwNeonRegister src);
|
||||||
|
|
||||||
// Pseudo instructions
|
// Pseudo instructions
|
||||||
|
|
||||||
// Different nop operations are used by the code generator to detect certain
|
// Different nop operations are used by the code generator to detect certain
|
||||||
|
@ -1638,6 +1638,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
i.InputDoubleRegister(0));
|
i.InputDoubleRegister(0));
|
||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
break;
|
break;
|
||||||
|
case kArmVcnt: {
|
||||||
|
__ vcnt(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case kArmLdrb:
|
case kArmLdrb:
|
||||||
__ ldrb(i.OutputRegister(), i.InputOffset());
|
__ ldrb(i.OutputRegister(), i.InputOffset());
|
||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
|
@ -110,6 +110,7 @@ namespace compiler {
|
|||||||
V(ArmVst1F64) \
|
V(ArmVst1F64) \
|
||||||
V(ArmVld1S128) \
|
V(ArmVld1S128) \
|
||||||
V(ArmVst1S128) \
|
V(ArmVst1S128) \
|
||||||
|
V(ArmVcnt) \
|
||||||
V(ArmFloat32Max) \
|
V(ArmFloat32Max) \
|
||||||
V(ArmFloat64Max) \
|
V(ArmFloat64Max) \
|
||||||
V(ArmFloat32Min) \
|
V(ArmFloat32Min) \
|
||||||
|
@ -103,6 +103,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kArmVmovHighF64U32:
|
case kArmVmovHighF64U32:
|
||||||
case kArmVmovF64U32U32:
|
case kArmVmovF64U32U32:
|
||||||
case kArmVmovU32U32F64:
|
case kArmVmovU32U32F64:
|
||||||
|
case kArmVcnt:
|
||||||
case kArmFloat32Max:
|
case kArmFloat32Max:
|
||||||
case kArmFloat64Max:
|
case kArmFloat64Max:
|
||||||
case kArmFloat32Min:
|
case kArmFloat32Min:
|
||||||
|
@ -2540,6 +2540,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
|||||||
V(I16x8Abs, kArmI16x8Abs) \
|
V(I16x8Abs, kArmI16x8Abs) \
|
||||||
V(I8x16Neg, kArmI8x16Neg) \
|
V(I8x16Neg, kArmI8x16Neg) \
|
||||||
V(I8x16Abs, kArmI8x16Abs) \
|
V(I8x16Abs, kArmI8x16Abs) \
|
||||||
|
V(I8x16Popcnt, kArmVcnt) \
|
||||||
V(S128Not, kArmS128Not) \
|
V(S128Not, kArmS128Not) \
|
||||||
V(V32x4AnyTrue, kArmV32x4AnyTrue) \
|
V(V32x4AnyTrue, kArmV32x4AnyTrue) \
|
||||||
V(V32x4AllTrue, kArmV32x4AllTrue) \
|
V(V32x4AllTrue, kArmV32x4AllTrue) \
|
||||||
|
@ -2737,6 +2737,11 @@ void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
|
|||||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_ARM64
|
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_ARM64
|
||||||
// && !V8_TARGET_ARCH_IA32
|
// && !V8_TARGET_ARCH_IA32
|
||||||
|
|
||||||
|
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
|
||||||
|
// TODO(v8:11002) Prototype i8x16.popcnt.
|
||||||
|
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
|
||||||
|
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
|
||||||
|
|
||||||
#if !V8_TARGET_ARCH_ARM64
|
#if !V8_TARGET_ARCH_ARM64
|
||||||
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
|
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
|
||||||
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
|
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
|
||||||
@ -2758,9 +2763,6 @@ void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
|
|||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(v8:11002) Prototype i8x16.popcnt.
|
|
||||||
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
|
|
||||||
|
|
||||||
// TODO(v8:11008) Prototype extended multiplication.
|
// TODO(v8:11008) Prototype extended multiplication.
|
||||||
void InstructionSelector::VisitI64x2ExtMulLowI32x4S(Node* node) {
|
void InstructionSelector::VisitI64x2ExtMulLowI32x4S(Node* node) {
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
|
@ -2268,6 +2268,9 @@ void Decoder::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
|
|||||||
"vrev%d.%d q%d, q%d", op, esize, Vd, Vm);
|
"vrev%d.%d q%d, q%d", op, esize, Vd, Vm);
|
||||||
} else if (size == 0 && opc1 == 0b10 && opc2 == 0) {
|
} else if (size == 0 && opc1 == 0b10 && opc2 == 0) {
|
||||||
Format(instr, q ? "vswp 'Qd, 'Qm" : "vswp 'Dd, 'Dm");
|
Format(instr, q ? "vswp 'Qd, 'Qm" : "vswp 'Dd, 'Dm");
|
||||||
|
} else if (opc1 == 0 && opc2 == 0b1010) {
|
||||||
|
DCHECK_EQ(0, size);
|
||||||
|
Format(instr, q ? "vcnt.8 'Qd, 'Qm" : "vcnt.8 'Dd, 'Dm");
|
||||||
} else if (opc1 == 0 && opc2 == 0b1011) {
|
} else if (opc1 == 0 && opc2 == 0b1011) {
|
||||||
Format(instr, "vmvn 'Qd, 'Qm");
|
Format(instr, "vmvn 'Qd, 'Qm");
|
||||||
} else if (opc1 == 0b01 && (opc2 & 0b0111) == 0b110) {
|
} else if (opc1 == 0b01 && (opc2 & 0b0111) == 0b110) {
|
||||||
|
@ -4461,6 +4461,17 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
|
|||||||
set_neon_register(vm, dval);
|
set_neon_register(vm, dval);
|
||||||
set_neon_register(vd, mval);
|
set_neon_register(vd, mval);
|
||||||
}
|
}
|
||||||
|
} else if (opc1 == 0 && opc2 == 0b1010) {
|
||||||
|
// vcnt Qd, Qm.
|
||||||
|
DCHECK_EQ(0, size);
|
||||||
|
int vd = instr->VFPDRegValue(q ? kSimd128Precision : kDoublePrecision);
|
||||||
|
int vm = instr->VFPMRegValue(q ? kSimd128Precision : kDoublePrecision);
|
||||||
|
uint8_t q_data[16];
|
||||||
|
get_neon_register(vm, q_data);
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
q_data[i] = base::bits::CountPopulation(q_data[i]);
|
||||||
|
}
|
||||||
|
set_neon_register(vd, q_data);
|
||||||
} else if (opc1 == 0 && opc2 == 0b1011) {
|
} else if (opc1 == 0 && opc2 == 0b1011) {
|
||||||
// vmvn Qd, Qm.
|
// vmvn Qd, Qm.
|
||||||
int vd = instr->VFPDRegValue(kSimd128Precision);
|
int vd = instr->VFPDRegValue(kSimd128Precision);
|
||||||
|
@ -1323,6 +1323,8 @@ TEST(Neon) {
|
|||||||
"f3b1fa45 vtbx.8 d15, {d1, d2, d3}, d5");
|
"f3b1fa45 vtbx.8 d15, {d1, d2, d3}, d5");
|
||||||
COMPARE(vtbx(d15, NeonListOperand(d1, 4), d5),
|
COMPARE(vtbx(d15, NeonListOperand(d1, 4), d5),
|
||||||
"f3b1fb45 vtbx.8 d15, {d1, d2, d3, d4}, d5");
|
"f3b1fb45 vtbx.8 d15, {d1, d2, d3, d4}, d5");
|
||||||
|
COMPARE(vcnt(q1, q2),
|
||||||
|
"f3b02544 vcnt.8 q1, q2");
|
||||||
}
|
}
|
||||||
|
|
||||||
VERIFY_RUN();
|
VERIFY_RUN();
|
||||||
|
@ -2545,7 +2545,7 @@ WASM_SIMD_TEST(I8x16Abs) {
|
|||||||
RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Abs, Abs);
|
RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Abs, Abs);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if V8_TARGET_ARCH_ARM64
|
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
|
||||||
// TODO(v8:11002) Prototype i8x16.popcnt.
|
// TODO(v8:11002) Prototype i8x16.popcnt.
|
||||||
WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) {
|
WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) {
|
||||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||||
@ -2568,7 +2568,7 @@ WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // V8_TARGET_ARCH_ARM64
|
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
|
||||||
|
|
||||||
// Tests both signed and unsigned conversion from I16x8 (packing).
|
// Tests both signed and unsigned conversion from I16x8 (packing).
|
||||||
WASM_SIMD_TEST(I8x16ConvertI16x8) {
|
WASM_SIMD_TEST(I8x16ConvertI16x8) {
|
||||||
|
Loading…
Reference in New Issue
Block a user