[arm] Optimize vcmp when lhs operand is #0.0

This patch checks the type of the lhs operand of a floating point
comparison for ARM, and commutes the operands if it is #0.0.  It allows
us to optimize a comparison with zero, as the vcmp instruction
accepts #0.0 as rhs operand.

Code before for "0.0 < 0.123":
------------------------------
movw ip, #29360
movt ip, #37224
movw r9, #31981
movt r9, #16319
vmov d0, ip, r9
mov ip, #0
vmov d1, ip, ip
vcmp.f64 d1, d0
vmrs APSR, FPSCR
bcc +12

Code after:
-----------
movw ip, #29360
movt ip, #37224
movw r9, #31981
movt r9, #16319
vmov d0, ip, r9
vcmp.f64 d0, #0.0
vmrs APSR, FPSCR
bgt +12

BUG=

Review URL: https://codereview.chromium.org/1361913003

Cr-Commit-Position: refs/heads/master@{#30911}
This commit is contained in:
pierre.langlois 2015-09-24 05:04:01 -07:00 committed by Commit bot
parent 8087c49dc7
commit d1472d65dd
3 changed files with 106 additions and 67 deletions

View File

@ -220,6 +220,22 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
return ls;
case kUnsignedGreaterThan:
return hi;
case kFloatLessThanOrUnordered:
return lt;
case kFloatGreaterThanOrEqual:
return ge;
case kFloatLessThanOrEqual:
return ls;
case kFloatGreaterThanOrUnordered:
return hi;
case kFloatLessThan:
return lo;
case kFloatGreaterThanOrEqualOrUnordered:
return hs;
case kFloatLessThanOrEqualOrUnordered:
return le;
case kFloatGreaterThan:
return gt;
case kOverflow:
return vs;
case kNotOverflow:

View File

@ -1267,22 +1267,37 @@ void InstructionSelector::VisitTailCall(Node* node) {
namespace {
// Shared routine for multiple compare operations.
void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
InstructionOperand left, InstructionOperand right,
FlagsContinuation* cont) {
ArmOperandGenerator g(selector);
opcode = cont->Encode(opcode);
if (cont->IsBranch()) {
selector->Emit(opcode, g.NoOutput(), left, right,
g.Label(cont->true_block()), g.Label(cont->false_block()));
} else {
DCHECK(cont->IsSet());
selector->Emit(opcode, g.DefineAsRegister(cont->result()), left, right);
}
}
// Shared routine for multiple float32 compare operations.
void VisitFloat32Compare(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
ArmOperandGenerator g(selector);
Float32BinopMatcher m(node);
InstructionOperand rhs = m.right().Is(0.0) ? g.UseImmediate(m.right().node())
: g.UseRegister(m.right().node());
if (cont->IsBranch()) {
selector->Emit(cont->Encode(kArmVcmpF32), g.NoOutput(),
g.UseRegister(m.left().node()), rhs,
g.Label(cont->true_block()), g.Label(cont->false_block()));
if (m.right().Is(0.0f)) {
VisitCompare(selector, kArmVcmpF32, g.UseRegister(m.left().node()),
g.UseImmediate(m.right().node()), cont);
} else if (m.left().Is(0.0f)) {
cont->Commute();
VisitCompare(selector, kArmVcmpF32, g.UseRegister(m.right().node()),
g.UseImmediate(m.left().node()), cont);
} else {
DCHECK(cont->IsSet());
selector->Emit(cont->Encode(kArmVcmpF32),
g.DefineAsRegister(cont->result()),
g.UseRegister(m.left().node()), rhs);
VisitCompare(selector, kArmVcmpF32, g.UseRegister(m.left().node()),
g.UseRegister(m.right().node()), cont);
}
}
@ -1292,17 +1307,16 @@ void VisitFloat64Compare(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
ArmOperandGenerator g(selector);
Float64BinopMatcher m(node);
InstructionOperand rhs = m.right().Is(0.0) ? g.UseImmediate(m.right().node())
: g.UseRegister(m.right().node());
if (cont->IsBranch()) {
selector->Emit(cont->Encode(kArmVcmpF64), g.NoOutput(),
g.UseRegister(m.left().node()), rhs,
g.Label(cont->true_block()), g.Label(cont->false_block()));
if (m.right().Is(0.0)) {
VisitCompare(selector, kArmVcmpF64, g.UseRegister(m.left().node()),
g.UseImmediate(m.right().node()), cont);
} else if (m.left().Is(0.0)) {
cont->Commute();
VisitCompare(selector, kArmVcmpF64, g.UseRegister(m.right().node()),
g.UseImmediate(m.left().node()), cont);
} else {
DCHECK(cont->IsSet());
selector->Emit(cont->Encode(kArmVcmpF64),
g.DefineAsRegister(cont->result()),
g.UseRegister(m.left().node()), rhs);
VisitCompare(selector, kArmVcmpF64, g.UseRegister(m.left().node()),
g.UseRegister(m.right().node()), cont);
}
}
@ -1389,19 +1403,19 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user,
cont->OverwriteAndNegateIfEqual(kEqual);
return VisitFloat32Compare(selector, value, cont);
case IrOpcode::kFloat32LessThan:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
cont->OverwriteAndNegateIfEqual(kFloatLessThan);
return VisitFloat32Compare(selector, value, cont);
case IrOpcode::kFloat32LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
cont->OverwriteAndNegateIfEqual(kFloatLessThanOrEqual);
return VisitFloat32Compare(selector, value, cont);
case IrOpcode::kFloat64Equal:
cont->OverwriteAndNegateIfEqual(kEqual);
return VisitFloat64Compare(selector, value, cont);
case IrOpcode::kFloat64LessThan:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
cont->OverwriteAndNegateIfEqual(kFloatLessThan);
return VisitFloat64Compare(selector, value, cont);
case IrOpcode::kFloat64LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
cont->OverwriteAndNegateIfEqual(kFloatLessThanOrEqual);
return VisitFloat64Compare(selector, value, cont);
case IrOpcode::kProjection:
// Check if this is the overflow output projection of an
@ -1565,13 +1579,13 @@ void InstructionSelector::VisitFloat32Equal(Node* node) {
void InstructionSelector::VisitFloat32LessThan(Node* node) {
FlagsContinuation cont(kUnsignedLessThan, node);
FlagsContinuation cont(kFloatLessThan, node);
VisitFloat32Compare(this, node, &cont);
}
void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
FlagsContinuation cont(kUnsignedLessThanOrEqual, node);
FlagsContinuation cont(kFloatLessThanOrEqual, node);
VisitFloat32Compare(this, node, &cont);
}
@ -1583,13 +1597,13 @@ void InstructionSelector::VisitFloat64Equal(Node* node) {
void InstructionSelector::VisitFloat64LessThan(Node* node) {
FlagsContinuation cont(kUnsignedLessThan, node);
FlagsContinuation cont(kFloatLessThan, node);
VisitFloat64Compare(this, node, &cont);
}
void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
FlagsContinuation cont(kUnsignedLessThanOrEqual, node);
FlagsContinuation cont(kFloatLessThanOrEqual, node);
VisitFloat64Compare(this, node, &cont);
}

View File

@ -1392,6 +1392,7 @@ struct Comparison {
const char* constructor_name;
FlagsCondition flags_condition;
FlagsCondition negated_flags_condition;
FlagsCondition commuted_flags_condition;
};
@ -1401,15 +1402,17 @@ std::ostream& operator<<(std::ostream& os, const Comparison& cmp) {
const Comparison kComparisons[] = {
{&RawMachineAssembler::Word32Equal, "Word32Equal", kEqual, kNotEqual},
{&RawMachineAssembler::Word32Equal, "Word32Equal", kEqual, kNotEqual,
kEqual},
{&RawMachineAssembler::Int32LessThan, "Int32LessThan", kSignedLessThan,
kSignedGreaterThanOrEqual},
kSignedGreaterThanOrEqual, kSignedGreaterThan},
{&RawMachineAssembler::Int32LessThanOrEqual, "Int32LessThanOrEqual",
kSignedLessThanOrEqual, kSignedGreaterThan},
kSignedLessThanOrEqual, kSignedGreaterThan, kSignedGreaterThanOrEqual},
{&RawMachineAssembler::Uint32LessThan, "Uint32LessThan", kUnsignedLessThan,
kUnsignedGreaterThanOrEqual},
kUnsignedGreaterThanOrEqual, kUnsignedGreaterThan},
{&RawMachineAssembler::Uint32LessThanOrEqual, "Uint32LessThanOrEqual",
kUnsignedLessThanOrEqual, kUnsignedGreaterThan}};
kUnsignedLessThanOrEqual, kUnsignedGreaterThan,
kUnsignedGreaterThanOrEqual}};
} // namespace
@ -1495,11 +1498,13 @@ INSTANTIATE_TEST_CASE_P(InstructionSelectorTest,
namespace {
const Comparison kF32Comparisons[] = {
{&RawMachineAssembler::Float32Equal, "Float32Equal", kEqual, kNotEqual},
{&RawMachineAssembler::Float32Equal, "Float32Equal", kEqual, kNotEqual,
kEqual},
{&RawMachineAssembler::Float32LessThan, "Float32LessThan",
kUnsignedLessThan, kUnsignedGreaterThanOrEqual},
kFloatLessThan, kFloatGreaterThanOrEqualOrUnordered, kFloatGreaterThan},
{&RawMachineAssembler::Float32LessThanOrEqual, "Float32LessThanOrEqual",
kUnsignedLessThanOrEqual, kUnsignedGreaterThan}};
kFloatLessThanOrEqual, kFloatGreaterThanOrUnordered,
kFloatGreaterThanOrEqual}};
} // namespace
@ -1551,33 +1556,36 @@ TEST_P(InstructionSelectorF32ComparisonTest, WithImmediateZeroOnRight) {
}
TEST_P(InstructionSelectorF32ComparisonTest, WithImmediateZeroOnLeft) {
const Comparison& cmp = GetParam();
StreamBuilder m(this, kMachInt32, kMachFloat32);
m.Return((m.*cmp.constructor)(m.Float32Constant(0.0f), m.Parameter(0)));
Stream const s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArmVcmpF32, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(kFlags_set, s[0]->flags_mode());
EXPECT_EQ(cmp.commuted_flags_condition, s[0]->flags_condition());
}
INSTANTIATE_TEST_CASE_P(InstructionSelectorTest,
InstructionSelectorF32ComparisonTest,
::testing::ValuesIn(kF32Comparisons));
TEST_F(InstructionSelectorTest, Float32EqualWithImmediateZeroOnLeft) {
StreamBuilder m(this, kMachInt32, kMachFloat32);
m.Return(m.Float32Equal(m.Float32Constant(0.0f), m.Parameter(0)));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArmVcmpF32, s[0]->arch_opcode());
EXPECT_EQ(2U, s[0]->InputCount());
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
EXPECT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(kFlags_set, s[0]->flags_mode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
}
namespace {
const Comparison kF64Comparisons[] = {
{&RawMachineAssembler::Float64Equal, "Float64Equal", kEqual, kNotEqual},
{&RawMachineAssembler::Float64Equal, "Float64Equal", kEqual, kNotEqual,
kEqual},
{&RawMachineAssembler::Float64LessThan, "Float64LessThan",
kUnsignedLessThan, kUnsignedGreaterThanOrEqual},
kFloatLessThan, kFloatGreaterThanOrEqualOrUnordered, kFloatGreaterThan},
{&RawMachineAssembler::Float64LessThanOrEqual, "Float64LessThanOrEqual",
kUnsignedLessThanOrEqual, kUnsignedGreaterThan}};
kFloatLessThanOrEqual, kFloatGreaterThanOrUnordered,
kFloatGreaterThanOrEqual}};
} // namespace
@ -1629,25 +1637,26 @@ TEST_P(InstructionSelectorF64ComparisonTest, WithImmediateZeroOnRight) {
}
TEST_P(InstructionSelectorF64ComparisonTest, WithImmediateZeroOnLeft) {
const Comparison& cmp = GetParam();
StreamBuilder m(this, kMachInt32, kMachFloat64);
m.Return((m.*cmp.constructor)(m.Float64Constant(0.0), m.Parameter(0)));
Stream const s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArmVcmpF64, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(kFlags_set, s[0]->flags_mode());
EXPECT_EQ(cmp.commuted_flags_condition, s[0]->flags_condition());
}
INSTANTIATE_TEST_CASE_P(InstructionSelectorTest,
InstructionSelectorF64ComparisonTest,
::testing::ValuesIn(kF64Comparisons));
TEST_F(InstructionSelectorTest, Float64EqualWithImmediateZeroOnLeft) {
StreamBuilder m(this, kMachInt32, kMachFloat64);
m.Return(m.Float64Equal(m.Float64Constant(0.0), m.Parameter(0)));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArmVcmpF64, s[0]->arch_opcode());
EXPECT_EQ(2U, s[0]->InputCount());
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
EXPECT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(kFlags_set, s[0]->flags_mode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
}
// -----------------------------------------------------------------------------
// Floating point arithmetic.