[turbofan/x64] Load word64 followed by a shift right 32 -> load (and sign-extend if necessary) high 32bit.

We were missing this optimization in a few cases because TruncateInt64ToInt32 was also interfering.
Also removed the equivalent from simplified-lowering.cc, as the arm64 instruction selector has a similar optimization.

R=jarin@chromium.org

Review-Url: https://codereview.chromium.org/2252333002
Cr-Commit-Position: refs/heads/master@{#38711}
This commit is contained in:
epertoso 2016-08-18 04:24:24 -07:00 committed by Commit bot
parent 982b399423
commit 06cde13e7c
3 changed files with 102 additions and 53 deletions

View File

@ -2189,29 +2189,8 @@ class RepresentationSelector {
FieldAccess access = FieldAccessOf(node->op());
MachineRepresentation const representation =
access.machine_type.representation();
// If we are loading from a Smi field and truncate the result to Word32,
// we can instead just load the high word on 64-bit architectures, which
// is exactly the Word32 we are looking for, and therefore avoid a nasty
// right shift afterwards.
// TODO(bmeurer): Introduce an appropriate tagged-signed machine rep.
if (truncation.IsUsedAsWord32() &&
representation == MachineRepresentation::kTagged &&
access.type->Is(Type::TaggedSigned()) && SmiValuesAre32Bits()) {
VisitUnop(node, UseInfoForBasePointer(access),
MachineRepresentation::kWord32);
if (lower()) {
// Morph this Smi load field into an int32 load field.
access.machine_type = MachineType::Int32();
access.type = type_cache_.kInt32;
#if V8_TARGET_LITTLE_ENDIAN
access.offset += kPointerSize / 2;
#endif
NodeProperties::ChangeOp(node,
jsgraph_->simplified()->LoadField(access));
}
} else {
VisitUnop(node, UseInfoForBasePointer(access), representation);
}
VisitUnop(node, UseInfoForBasePointer(access), representation);
return;
}
case IrOpcode::kStoreField: {

View File

@ -37,6 +37,15 @@ class X64OperandGenerator final : public OperandGenerator {
}
}
int32_t GetImmediateIntegerValue(Node* node) {
DCHECK(CanBeImmediate(node));
if (node->opcode() == IrOpcode::kInt32Constant) {
return OpParameter<int32_t>(node);
}
DCHECK_EQ(IrOpcode::kInt64Constant, node->opcode());
return static_cast<int32_t>(OpParameter<int64_t>(node));
}
bool CanBeMemoryOperand(InstructionCode opcode, Node* node, Node* input,
int effect_level) {
if (input->opcode() != IrOpcode::kLoad ||
@ -631,33 +640,14 @@ void InstructionSelector::VisitWord32Shr(Node* node) {
VisitWord32Shift(this, node, kX64Shr32);
}
void InstructionSelector::VisitWord64Shr(Node* node) {
VisitWord64Shift(this, node, kX64Shr);
}
void InstructionSelector::VisitWord32Sar(Node* node) {
X64OperandGenerator g(this);
Int32BinopMatcher m(node);
if (CanCover(m.node(), m.left().node()) && m.left().IsWord32Shl()) {
Int32BinopMatcher mleft(m.left().node());
if (mleft.right().Is(16) && m.right().Is(16)) {
Emit(kX64Movsxwl, g.DefineAsRegister(node), g.Use(mleft.left().node()));
return;
} else if (mleft.right().Is(24) && m.right().Is(24)) {
Emit(kX64Movsxbl, g.DefineAsRegister(node), g.Use(mleft.left().node()));
return;
}
}
VisitWord32Shift(this, node, kX64Sar32);
}
void InstructionSelector::VisitWord64Sar(Node* node) {
X64OperandGenerator g(this);
namespace {
bool TryMatchLoadWord64AndShiftRight(InstructionSelector* selector, Node* node,
InstructionCode opcode) {
DCHECK(IrOpcode::kWord64Sar == node->opcode() ||
IrOpcode::kWord64Shr == node->opcode());
X64OperandGenerator g(selector);
Int64BinopMatcher m(node);
if (CanCover(m.node(), m.left().node()) && m.left().IsLoad() &&
if (selector->CanCover(m.node(), m.left().node()) && m.left().IsLoad() &&
m.right().Is(32)) {
// Just load and sign-extend the interesting 4 bytes instead. This happens,
// for example, when we're loading and untagging SMIs.
@ -715,16 +705,43 @@ void InstructionSelector::VisitWord64Sar(Node* node) {
}
inputs[input_count++] = ImmediateOperand(ImmediateOperand::INLINE, 4);
} else {
ImmediateOperand* op = ImmediateOperand::cast(&inputs[input_count - 1]);
int32_t displacement = sequence()->GetImmediate(op).ToInt32();
*op = ImmediateOperand(ImmediateOperand::INLINE, displacement + 4);
int32_t displacement = g.GetImmediateIntegerValue(mleft.displacement());
inputs[input_count - 1] =
ImmediateOperand(ImmediateOperand::INLINE, displacement + 4);
}
InstructionOperand outputs[] = {g.DefineAsRegister(node)};
InstructionCode code = kX64Movsxlq | AddressingModeField::encode(mode);
Emit(code, 1, outputs, input_count, inputs);
InstructionCode code = opcode | AddressingModeField::encode(mode);
selector->Emit(code, 1, outputs, input_count, inputs);
return true;
}
}
return false;
}
} // namespace
void InstructionSelector::VisitWord64Shr(Node* node) {
if (TryMatchLoadWord64AndShiftRight(this, node, kX64Movl)) return;
VisitWord64Shift(this, node, kX64Shr);
}
void InstructionSelector::VisitWord32Sar(Node* node) {
X64OperandGenerator g(this);
Int32BinopMatcher m(node);
if (CanCover(m.node(), m.left().node()) && m.left().IsWord32Shl()) {
Int32BinopMatcher mleft(m.left().node());
if (mleft.right().Is(16) && m.right().Is(16)) {
Emit(kX64Movsxwl, g.DefineAsRegister(node), g.Use(mleft.left().node()));
return;
} else if (mleft.right().Is(24) && m.right().Is(24)) {
Emit(kX64Movsxbl, g.DefineAsRegister(node), g.Use(mleft.left().node()));
return;
}
}
VisitWord32Shift(this, node, kX64Sar32);
}
void InstructionSelector::VisitWord64Sar(Node* node) {
if (TryMatchLoadWord64AndShiftRight(this, node, kX64Movsxlq)) return;
VisitWord64Shift(this, node, kX64Sar);
}
@ -1258,6 +1275,10 @@ void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) {
case IrOpcode::kWord64Shr: {
Int64BinopMatcher m(value);
if (m.right().Is(32)) {
if (TryMatchLoadWord64AndShiftRight(this, value, kX64Movl)) {
Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value));
return;
}
Emit(kX64Shr, g.DefineSameAsFirst(node),
g.UseRegister(m.left().node()), g.TempImmediate(32));
return;

View File

@ -1303,6 +1303,55 @@ TEST_F(InstructionSelectorTest, Word32Clz) {
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
}
TEST_F(InstructionSelectorTest, LoadAndWord64ShiftRight32) {
{
StreamBuilder m(this, MachineType::Uint64(), MachineType::Uint32());
Node* const p0 = m.Parameter(0);
Node* const load = m.Load(MachineType::Uint64(), p0);
Node* const shift = m.Word64Shr(load, m.Int32Constant(32));
m.Return(shift);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kX64Movl, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(4, s.ToInt32(s[0]->InputAt(1)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(shift), s.ToVreg(s[0]->Output()));
}
{
StreamBuilder m(this, MachineType::Int64(), MachineType::Int32());
Node* const p0 = m.Parameter(0);
Node* const load = m.Load(MachineType::Int64(), p0);
Node* const shift = m.Word64Sar(load, m.Int32Constant(32));
m.Return(shift);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kX64Movsxlq, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(4, s.ToInt32(s[0]->InputAt(1)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(shift), s.ToVreg(s[0]->Output()));
}
{
StreamBuilder m(this, MachineType::Int64(), MachineType::Int32());
Node* const p0 = m.Parameter(0);
Node* const load = m.Load(MachineType::Int64(), p0);
Node* const shift = m.Word64Sar(load, m.Int32Constant(32));
Node* const truncate = m.TruncateInt64ToInt32(shift);
m.Return(truncate);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kX64Movl, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(4, s.ToInt32(s[0]->InputAt(1)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(shift), s.ToVreg(s[0]->Output()));
}
}
} // namespace compiler
} // namespace internal
} // namespace v8