From 9ebcfb41e26b728afbc8f64759dcbb5323d27378 Mon Sep 17 00:00:00 2001 From: "ulan@chromium.org" Date: Mon, 25 Feb 2013 16:15:37 +0000 Subject: [PATCH] ARM: Make DoStoreKeyedFixedDoubleArray faster; don't allow conditional Vmov This patch makes us generate faster code for DoStoreKeyedFixedDoubleArray, by using a branch rather than a conditional Vmov instruction. Conditional VFP instructions are not a great idea in general, and it was especially bad in this case because Vmov expands to a bunch of instructions. For this reason, the patch also removes the 'cond' parameter from Vmov. Thanks to Rodolph for pointing me to this! BUG=none Review URL: https://chromiumcodereview.appspot.com/12316096 Patch from Hans Wennborg . git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13722 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/assembler-arm.cc | 17 ++++++++--------- src/arm/assembler-arm.h | 3 +-- src/arm/lithium-codegen-arm.cc | 8 ++++++-- src/arm/macro-assembler-arm.cc | 9 ++++----- src/arm/macro-assembler-arm.h | 3 +-- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index 0c9a6022fc..7cd0a1753e 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -2067,8 +2067,7 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) { void Assembler::vmov(const DwVfpRegister dst, double imm, - const Register scratch, - const Condition cond) { + const Register scratch) { ASSERT(CpuFeatures::IsEnabled(VFP2)); uint32_t enc; @@ -2081,7 +2080,7 @@ void Assembler::vmov(const DwVfpRegister dst, // Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0) int vd, d; dst.split_code(&vd, &d); - emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | enc); + emit(al | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | enc); } else if (FLAG_enable_vldr_imm) { // TODO(jfb) Temporarily turned off until we have constant blinding or // some equivalent mitigation: an attacker can otherwise control @@ -2099,7 +2098,7 @@ void Assembler::vmov(const DwVfpRegister dst, // that's tricky because vldr has a limited reach. Furthermore // it breaks load locality. RecordRelocInfo(imm); - vldr(dst, MemOperand(pc, 0), cond); + vldr(dst, MemOperand(pc, 0)); } else { // Synthesise the double from ARM immediates. uint32_t lo, hi; @@ -2110,27 +2109,27 @@ void Assembler::vmov(const DwVfpRegister dst, // Move the low part of the double into the lower of the corresponsing S // registers of D register dst. mov(ip, Operand(lo)); - vmov(dst.low(), ip, cond); + vmov(dst.low(), ip); // Move the high part of the double into the higher of the // corresponsing S registers of D register dst. mov(ip, Operand(hi)); - vmov(dst.high(), ip, cond); + vmov(dst.high(), ip); } else { // D16-D31 does not have S registers, so move the low and high parts // directly to the D register using vmov.32. // Note: This may be slower, so we only do this when we have to. mov(ip, Operand(lo)); - vmov(dst, VmovIndexLo, ip, cond); + vmov(dst, VmovIndexLo, ip); mov(ip, Operand(hi)); - vmov(dst, VmovIndexHi, ip, cond); + vmov(dst, VmovIndexHi, ip); } } else { // Move the low and high parts of the double to a D register in one // instruction. mov(ip, Operand(lo)); mov(scratch, Operand(hi)); - vmov(dst, ip, scratch, cond); + vmov(dst, ip, scratch); } } } diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index acf4beb87b..b32c0f3c5a 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -1066,8 +1066,7 @@ class Assembler : public AssemblerBase { void vmov(const DwVfpRegister dst, double imm, - const Register scratch = no_reg, - const Condition cond = al); + const Register scratch = no_reg); void vmov(const SwVfpRegister dst, const SwVfpRegister src, const Condition cond = al); diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc index bad66fe821..f140e39456 100644 --- a/src/arm/lithium-codegen-arm.cc +++ b/src/arm/lithium-codegen-arm.cc @@ -4472,10 +4472,14 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) { if (instr->NeedsCanonicalization()) { // Check for NaN. All NaNs must be canonicalized. __ VFPCompareAndSetFlags(value, value); + Label after_canonicalization; + // Only load canonical NaN if the comparison above set the overflow. + __ b(vc, &after_canonicalization); __ Vmov(value, - FixedDoubleArray::canonical_not_the_hole_nan_as_double(), - no_reg, vs); + FixedDoubleArray::canonical_not_the_hole_nan_as_double()); + + __ bind(&after_canonicalization); } __ vstr(value, scratch, instr->additional_index() << element_size_shift); diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc index c1a3881c3e..befe2dd002 100644 --- a/src/arm/macro-assembler-arm.cc +++ b/src/arm/macro-assembler-arm.cc @@ -812,19 +812,18 @@ void MacroAssembler::VFPCompareAndLoadFlags(const DwVfpRegister src1, void MacroAssembler::Vmov(const DwVfpRegister dst, const double imm, - const Register scratch, - const Condition cond) { + const Register scratch) { ASSERT(CpuFeatures::IsEnabled(VFP2)); static const DoubleRepresentation minus_zero(-0.0); static const DoubleRepresentation zero(0.0); DoubleRepresentation value(imm); // Handle special values first. if (value.bits == zero.bits) { - vmov(dst, kDoubleRegZero, cond); + vmov(dst, kDoubleRegZero); } else if (value.bits == minus_zero.bits) { - vneg(dst, kDoubleRegZero, cond); + vneg(dst, kDoubleRegZero); } else { - vmov(dst, imm, scratch, cond); + vmov(dst, imm, scratch); } } diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h index dc4529bf7c..d26eda7f85 100644 --- a/src/arm/macro-assembler-arm.h +++ b/src/arm/macro-assembler-arm.h @@ -480,8 +480,7 @@ class MacroAssembler: public Assembler { void Vmov(const DwVfpRegister dst, const double imm, - const Register scratch = no_reg, - const Condition cond = al); + const Register scratch = no_reg); // Enter exit frame. // stack_space - extra stack space, used for alignment before call to C.