Reland "[turbofan][arm64] Emit Lsl for Int32MulWithOverflow when possible"
This is a reland of commit aa541f1c9c
Original change's description:
> [turbofan][arm64] Emit Lsl for Int32MulWithOverflow when possible
>
> Int32MulWithOverflow on arm64 uses a cmp to set flags rather than
> the multiply instruction itself, thus we can use a left shift when
> the multiplication is by a power of two.
>
> This provides 0.15% for Speedometer2 on a Neoverse-N1 machine,
> with React being improved by 0.45%.
>
> Change-Id: Ic8db42ecc7cb14cf1ac7bbbeab0e9d8359104351
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3829472
> Commit-Queue: George Wort <george.wort@arm.com>
> Reviewed-by: Nico Hartmann <nicohartmann@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#82499}
Change-Id: Ib8f387bd41d283df551299f7ee98e72d39e2a3bd
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3865484
Commit-Queue: George Wort <george.wort@arm.com>
Reviewed-by: Nico Hartmann <nicohartmann@chromium.org>
Cr-Commit-Position: refs/heads/main@{#82909}
This commit is contained in:
parent
1c84fedbf5
commit
0d591e919d
@ -903,13 +903,6 @@ void TurboAssembler::Ror(const Register& rd, const Register& rn,
|
||||
rorv(rd, rn, rm);
|
||||
}
|
||||
|
||||
void MacroAssembler::Sbfiz(const Register& rd, const Register& rn, unsigned lsb,
|
||||
unsigned width) {
|
||||
DCHECK(allow_macro_instructions());
|
||||
DCHECK(!rd.IsZero());
|
||||
sbfiz(rd, rn, lsb, width);
|
||||
}
|
||||
|
||||
void TurboAssembler::Sbfx(const Register& rd, const Register& rn, unsigned lsb,
|
||||
unsigned width) {
|
||||
DCHECK(allow_macro_instructions());
|
||||
@ -990,6 +983,13 @@ void TurboAssembler::Ubfiz(const Register& rd, const Register& rn, unsigned lsb,
|
||||
ubfiz(rd, rn, lsb, width);
|
||||
}
|
||||
|
||||
void TurboAssembler::Sbfiz(const Register& rd, const Register& rn, unsigned lsb,
|
||||
unsigned width) {
|
||||
DCHECK(allow_macro_instructions());
|
||||
DCHECK(!rd.IsZero());
|
||||
sbfiz(rd, rn, lsb, width);
|
||||
}
|
||||
|
||||
void TurboAssembler::Ubfx(const Register& rd, const Register& rn, unsigned lsb,
|
||||
unsigned width) {
|
||||
DCHECK(allow_macro_instructions());
|
||||
|
@ -1063,6 +1063,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
inline void Sxtw(const Register& rd, const Register& rn);
|
||||
inline void Ubfiz(const Register& rd, const Register& rn, unsigned lsb,
|
||||
unsigned width);
|
||||
inline void Sbfiz(const Register& rd, const Register& rn, unsigned lsb,
|
||||
unsigned width);
|
||||
inline void Ubfx(const Register& rd, const Register& rn, unsigned lsb,
|
||||
unsigned width);
|
||||
inline void Lsr(const Register& rd, const Register& rn, unsigned shift);
|
||||
@ -1624,8 +1626,6 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
|
||||
mvni(vd, imm8, shift, shift_amount);
|
||||
}
|
||||
inline void Rev(const Register& rd, const Register& rn);
|
||||
inline void Sbfiz(const Register& rd, const Register& rn, unsigned lsb,
|
||||
unsigned width);
|
||||
inline void Smaddl(const Register& rd, const Register& rn, const Register& rm,
|
||||
const Register& ra);
|
||||
inline void Smsubl(const Register& rd, const Register& rn, const Register& rm,
|
||||
|
@ -1474,6 +1474,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Ubfiz(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
|
||||
i.InputInt5(2));
|
||||
break;
|
||||
case kArm64Sbfiz:
|
||||
__ Sbfiz(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
|
||||
i.InputInt6(2));
|
||||
break;
|
||||
case kArm64Bfi:
|
||||
__ Bfi(i.OutputRegister(), i.InputRegister(1), i.InputInt6(2),
|
||||
i.InputInt6(3));
|
||||
|
@ -123,6 +123,7 @@ namespace compiler {
|
||||
V(Arm64Ubfx) \
|
||||
V(Arm64Ubfx32) \
|
||||
V(Arm64Ubfiz32) \
|
||||
V(Arm64Sbfiz) \
|
||||
V(Arm64Bfi) \
|
||||
V(Arm64Rbit) \
|
||||
V(Arm64Rbit32) \
|
||||
|
@ -89,6 +89,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64Ubfx:
|
||||
case kArm64Ubfx32:
|
||||
case kArm64Ubfiz32:
|
||||
case kArm64Sbfiz:
|
||||
case kArm64Bfi:
|
||||
case kArm64Rbit:
|
||||
case kArm64Rbit32:
|
||||
@ -412,6 +413,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
|
||||
case kArm64Sxth32:
|
||||
case kArm64Sxtw:
|
||||
case kArm64Ubfiz32:
|
||||
case kArm64Sbfiz:
|
||||
case kArm64Ubfx:
|
||||
case kArm64Ubfx32:
|
||||
return 1;
|
||||
|
@ -1669,8 +1669,17 @@ void EmitInt32MulWithOverflow(InstructionSelector* selector, Node* node,
|
||||
Int32BinopMatcher m(node);
|
||||
InstructionOperand result = g.DefineAsRegister(node);
|
||||
InstructionOperand left = g.UseRegister(m.left().node());
|
||||
InstructionOperand right = g.UseRegister(m.right().node());
|
||||
selector->Emit(kArm64Smull, result, left, right);
|
||||
|
||||
if (m.right().HasResolvedValue() &&
|
||||
base::bits::IsPowerOfTwo(m.right().ResolvedValue())) {
|
||||
// Sign extend the bottom 32 bits and shift left.
|
||||
int32_t shift = base::bits::WhichPowerOfTwo(m.right().ResolvedValue());
|
||||
selector->Emit(kArm64Sbfiz, result, left, g.TempImmediate(shift),
|
||||
g.TempImmediate(32));
|
||||
} else {
|
||||
InstructionOperand right = g.UseRegister(m.right().node());
|
||||
selector->Emit(kArm64Smull, result, left, right);
|
||||
}
|
||||
|
||||
InstructionCode opcode =
|
||||
kArm64Cmp | AddressingModeField::encode(kMode_Operand2_R_SXTW);
|
||||
|
@ -201,19 +201,19 @@ class ValueHelper {
|
||||
}
|
||||
|
||||
static constexpr uint32_t uint32_array[] = {
|
||||
// 0x00000000, 0x00000001, 0xFFFFFFFF, 0x1B09788B, 0x04C5FCE8, 0xCC0DE5BF,
|
||||
// // This row is useful for testing lea optimizations on intel.
|
||||
// 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000008, 0x00000009,
|
||||
// 0x273A798E, 0x187937A3, 0xECE3AF83, 0x5495A16B, 0x0B668ECC, 0x11223344,
|
||||
// 0x0000009E, 0x00000043, 0x0000AF73, 0x0000116B, 0x00658ECC, 0x002B3B4C,
|
||||
// 0x88776655, 0x70000000, 0x07200000, 0x7FFFFFFF, 0x56123761, 0x7FFFFF00,
|
||||
// 0x761C4761, 0x80000000, 0x88888888, 0xA0000000, 0xDDDDDDDD, 0xE0000000,
|
||||
// 0xEEEEEEEE, 0xFFFFFFFD, 0xF0000000, 0x007FFFFF, 0x003FFFFF, 0x001FFFFF,
|
||||
// 0x000FFFFF, 0x0007FFFF, 0x0003FFFF, 0x0001FFFF, 0x0000FFFF, 0x00007FFF,
|
||||
// 0x00003FFF, 0x00001FFF, 0x00000FFF, 0x000007FF, 0x000003FF, 0x000001FF,
|
||||
0x00000000, 0x00000001, 0xFFFFFFFF, 0x1B09788B, 0x04C5FCE8, 0xCC0DE5BF,
|
||||
// This row is useful for testing lea optimizations on intel.
|
||||
0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000008, 0x00000009,
|
||||
0x273A798E, 0x187937A3, 0xECE3AF83, 0x5495A16B, 0x0B668ECC, 0x11223344,
|
||||
0x0000009E, 0x00000043, 0x0000AF73, 0x0000116B, 0x00658ECC, 0x002B3B4C,
|
||||
0x88776655, 0x70000000, 0x07200000, 0x7FFFFFFF, 0x56123761, 0x7FFFFF00,
|
||||
0x761C4761, 0x80000000, 0x88888888, 0xA0000000, 0xDDDDDDDD, 0xE0000000,
|
||||
0xEEEEEEEE, 0xFFFFFFFD, 0xF0000000, 0x007FFFFF, 0x003FFFFF, 0x001FFFFF,
|
||||
0x000FFFFF, 0x0007FFFF, 0x0003FFFF, 0x0001FFFF, 0x0000FFFF, 0x00007FFF,
|
||||
0x00003FFF, 0x00001FFF, 0x00000FFF, 0x000007FF, 0x000003FF, 0x000001FF,
|
||||
// Bit pattern of a quiet NaN and signaling NaN, with or without
|
||||
// additional payload.
|
||||
0x7F876543};
|
||||
0x7FC00000, 0x7F800000, 0x7FFFFFFF, 0x7F876543};
|
||||
|
||||
static constexpr base::Vector<const uint32_t> uint32_vector() {
|
||||
return base::ArrayVector(uint32_array);
|
||||
|
@ -1930,6 +1930,24 @@ TEST_F(InstructionSelectorTest, OvfBranchWithImmediateOnLeft) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(InstructionSelectorTest, OvfValMulImmediateOnRight) {
|
||||
TRACED_FORRANGE(int32_t, shift, 0, 30) {
|
||||
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
|
||||
m.Return(m.Projection(0, m.Int32MulWithOverflow(m.Int32Constant(1 << shift),
|
||||
m.Parameter(0))));
|
||||
Stream s = m.Build();
|
||||
|
||||
ASSERT_EQ(2U, s.size());
|
||||
EXPECT_EQ(kArm64Sbfiz, s[0]->arch_opcode());
|
||||
EXPECT_EQ(kArm64Cmp, s[1]->arch_opcode());
|
||||
ASSERT_EQ(3U, s[0]->InputCount());
|
||||
EXPECT_EQ(shift, s.ToInt32(s[0]->InputAt(1)));
|
||||
EXPECT_LE(1U, s[0]->OutputCount());
|
||||
EXPECT_EQ(32, s.ToInt32(s[0]->InputAt(2)));
|
||||
EXPECT_EQ(kFlags_none, s[0]->flags_mode());
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Shift instructions.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user