[Liftoff] Implement i64 popcnt
This is the last remaining missing instruction from the MVP. This CL adds support for ia32, x64, arm, and arm64. For CPUs which do not support the POPCNT instruction, there exists a fallback implementation in C. R=jkummerow@chromium.org Bug: v8:9919 Change-Id: Ie7a79a46e91726e15379b9a21b59775bbf5de556 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1895569 Commit-Queue: Clemens Backes <clemensb@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/master@{#64764}
This commit is contained in:
parent
6c0825aaa7
commit
d710756a7f
@ -762,30 +762,36 @@ void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
|
||||
clz(dst, dst);
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
|
||||
{
|
||||
UseScratchRegisterScope temps(this);
|
||||
LiftoffRegList pinned = LiftoffRegList::ForRegs(dst);
|
||||
Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
|
||||
Register scratch_2 = GetUnusedRegister(kGpReg, pinned).gp();
|
||||
// x = x - ((x & (0x55555555 << 1)) >> 1)
|
||||
and_(scratch, src, Operand(0xaaaaaaaa));
|
||||
sub(dst, src, Operand(scratch, LSR, 1));
|
||||
// x = (x & 0x33333333) + ((x & (0x33333333 << 2)) >> 2)
|
||||
mov(scratch, Operand(0x33333333));
|
||||
and_(scratch_2, dst, Operand(scratch, LSL, 2));
|
||||
and_(scratch, dst, scratch);
|
||||
add(dst, scratch, Operand(scratch_2, LSR, 2));
|
||||
}
|
||||
namespace liftoff {
|
||||
inline void GeneratePopCnt(Assembler* assm, Register dst, Register src,
|
||||
Register scratch1, Register scratch2) {
|
||||
DCHECK(!AreAliased(dst, scratch1, scratch2));
|
||||
if (src == scratch1) std::swap(scratch1, scratch2);
|
||||
// x = x - ((x & (0x55555555 << 1)) >> 1)
|
||||
assm->and_(scratch1, src, Operand(0xaaaaaaaa));
|
||||
assm->sub(dst, src, Operand(scratch1, LSR, 1));
|
||||
// x = (x & 0x33333333) + ((x & (0x33333333 << 2)) >> 2)
|
||||
assm->mov(scratch1, Operand(0x33333333));
|
||||
assm->and_(scratch2, dst, Operand(scratch1, LSL, 2));
|
||||
assm->and_(scratch1, dst, scratch1);
|
||||
assm->add(dst, scratch1, Operand(scratch2, LSR, 2));
|
||||
// x = (x + (x >> 4)) & 0x0F0F0F0F
|
||||
add(dst, dst, Operand(dst, LSR, 4));
|
||||
and_(dst, dst, Operand(0x0f0f0f0f));
|
||||
assm->add(dst, dst, Operand(dst, LSR, 4));
|
||||
assm->and_(dst, dst, Operand(0x0f0f0f0f));
|
||||
// x = x + (x >> 8)
|
||||
add(dst, dst, Operand(dst, LSR, 8));
|
||||
assm->add(dst, dst, Operand(dst, LSR, 8));
|
||||
// x = x + (x >> 16)
|
||||
add(dst, dst, Operand(dst, LSR, 16));
|
||||
assm->add(dst, dst, Operand(dst, LSR, 16));
|
||||
// x = x & 0x3F
|
||||
and_(dst, dst, Operand(0x3f));
|
||||
assm->and_(dst, dst, Operand(0x3f));
|
||||
}
|
||||
} // namespace liftoff
|
||||
|
||||
bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
|
||||
LiftoffRegList pinned = LiftoffRegList::ForRegs(dst);
|
||||
Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
|
||||
Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
|
||||
liftoff::GeneratePopCnt(this, dst, src, scratch1, scratch2);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1001,6 +1007,23 @@ void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
|
||||
mov(dst.high_gp(), Operand(0)); // High word of result is always 0.
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
// Produce partial popcnts in the two dst registers, making sure not to
|
||||
// overwrite the second src register before using it.
|
||||
Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp();
|
||||
Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp();
|
||||
LiftoffRegList pinned = LiftoffRegList::ForRegs(dst, src2);
|
||||
Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
|
||||
Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
|
||||
liftoff::GeneratePopCnt(this, dst.low_gp(), src1, scratch1, scratch2);
|
||||
liftoff::GeneratePopCnt(this, dst.high_gp(), src2, scratch1, scratch2);
|
||||
// Now add the two into the lower dst reg and clear the higher dst reg.
|
||||
add(dst.low_gp(), dst.low_gp(), dst.high_gp());
|
||||
mov(dst.high_gp(), Operand(0));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
|
||||
if (CpuFeatures::IsSupported(ARMv8)) {
|
||||
CpuFeatureScope scope(this, ARMv8);
|
||||
|
@ -588,6 +588,17 @@ void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
|
||||
Clz(dst.gp().X(), dst.gp().X());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
UseScratchRegisterScope temps(this);
|
||||
VRegister scratch = temps.AcquireV(kFormat8B);
|
||||
Fmov(scratch.D(), src.gp().X());
|
||||
Cnt(scratch, scratch);
|
||||
Addv(scratch.B(), scratch);
|
||||
Fmov(dst.gp().X(), scratch.D());
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
|
||||
Label* trap_div_by_zero,
|
||||
Label* trap_div_unrepresentable) {
|
||||
|
@ -1083,6 +1083,21 @@ void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
|
||||
xor_(dst.high_gp(), dst.high_gp()); // High word of result is always 0.
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (!CpuFeatures::IsSupported(POPCNT)) return false;
|
||||
CpuFeatureScope scope(this, POPCNT);
|
||||
// Produce partial popcnts in the two dst registers.
|
||||
Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp();
|
||||
Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp();
|
||||
popcnt(dst.low_gp(), src1);
|
||||
popcnt(dst.high_gp(), src2);
|
||||
// Add the two into the lower dst reg, clear the higher dst reg.
|
||||
add(dst.low_gp(), dst.high_gp());
|
||||
xor_(dst.high_gp(), dst.high_gp());
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32_to_intptr(Register dst, Register src) {
|
||||
// This is a nop on ia32.
|
||||
}
|
||||
|
@ -462,6 +462,7 @@ class LiftoffAssembler : public TurboAssembler {
|
||||
// i64 unops.
|
||||
inline void emit_i64_clz(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline void emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src);
|
||||
inline bool emit_i64_popcnt(LiftoffRegister dst, LiftoffRegister src);
|
||||
|
||||
inline void emit_i32_to_intptr(Register dst, Register src);
|
||||
|
||||
|
@ -839,8 +839,20 @@ class LiftoffCompiler {
|
||||
});
|
||||
break;
|
||||
case kExprI64Popcnt:
|
||||
return unsupported(decoder, kComplexOperation,
|
||||
WasmOpcodes::OpcodeName(opcode));
|
||||
EmitUnOp<kWasmI64, kWasmI64>(
|
||||
[=](LiftoffRegister dst, LiftoffRegister src) {
|
||||
if (__ emit_i64_popcnt(dst, src)) return;
|
||||
// The c function returns i32. We will zero-extend later.
|
||||
ValueType sig_i_l_reps[] = {kWasmI32, kWasmI64};
|
||||
FunctionSig sig_i_l(1, 1, sig_i_l_reps);
|
||||
LiftoffRegister c_call_dst = kNeedI64RegPair ? dst.low() : dst;
|
||||
GenerateCCall(&c_call_dst, &sig_i_l, kWasmStmt, &src,
|
||||
ExternalReference::wasm_word64_popcnt());
|
||||
// Now zero-extend the result to i64.
|
||||
__ emit_type_conversion(kExprI64UConvertI32, dst, c_call_dst,
|
||||
nullptr);
|
||||
});
|
||||
break;
|
||||
case kExprI32SConvertSatF32:
|
||||
case kExprI32UConvertSatF32:
|
||||
case kExprI32SConvertSatF64:
|
||||
|
@ -903,6 +903,14 @@ void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
|
||||
Tzcntq(dst.gp(), src.gp());
|
||||
}
|
||||
|
||||
bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (!CpuFeatures::IsSupported(POPCNT)) return false;
|
||||
CpuFeatureScope scope(this, POPCNT);
|
||||
popcntq(dst.gp(), src.gp());
|
||||
return true;
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32_to_intptr(Register dst, Register src) {
|
||||
movsxlq(dst, src);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user