ARM: Make DoStoreKeyedFixedDoubleArray faster; don't allow conditional Vmov
This patch makes us generate faster code for DoStoreKeyedFixedDoubleArray, by using a branch rather than a conditional Vmov instruction. Conditional VFP instructions are not a great idea in general, and it was especially bad in this case because Vmov expands to a bunch of instructions. For this reason, the patch also removes the 'cond' parameter from Vmov. Thanks to Rodolph for pointing me to this! BUG=none Review URL: https://chromiumcodereview.appspot.com/12316096 Patch from Hans Wennborg <hans@chromium.org>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13722 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
34c372d800
commit
9ebcfb41e2
@ -2067,8 +2067,7 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
|
||||
|
||||
void Assembler::vmov(const DwVfpRegister dst,
|
||||
double imm,
|
||||
const Register scratch,
|
||||
const Condition cond) {
|
||||
const Register scratch) {
|
||||
ASSERT(CpuFeatures::IsEnabled(VFP2));
|
||||
|
||||
uint32_t enc;
|
||||
@ -2081,7 +2080,7 @@ void Assembler::vmov(const DwVfpRegister dst,
|
||||
// Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0)
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | enc);
|
||||
emit(al | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | enc);
|
||||
} else if (FLAG_enable_vldr_imm) {
|
||||
// TODO(jfb) Temporarily turned off until we have constant blinding or
|
||||
// some equivalent mitigation: an attacker can otherwise control
|
||||
@ -2099,7 +2098,7 @@ void Assembler::vmov(const DwVfpRegister dst,
|
||||
// that's tricky because vldr has a limited reach. Furthermore
|
||||
// it breaks load locality.
|
||||
RecordRelocInfo(imm);
|
||||
vldr(dst, MemOperand(pc, 0), cond);
|
||||
vldr(dst, MemOperand(pc, 0));
|
||||
} else {
|
||||
// Synthesise the double from ARM immediates.
|
||||
uint32_t lo, hi;
|
||||
@ -2110,27 +2109,27 @@ void Assembler::vmov(const DwVfpRegister dst,
|
||||
// Move the low part of the double into the lower of the corresponsing S
|
||||
// registers of D register dst.
|
||||
mov(ip, Operand(lo));
|
||||
vmov(dst.low(), ip, cond);
|
||||
vmov(dst.low(), ip);
|
||||
|
||||
// Move the high part of the double into the higher of the
|
||||
// corresponsing S registers of D register dst.
|
||||
mov(ip, Operand(hi));
|
||||
vmov(dst.high(), ip, cond);
|
||||
vmov(dst.high(), ip);
|
||||
} else {
|
||||
// D16-D31 does not have S registers, so move the low and high parts
|
||||
// directly to the D register using vmov.32.
|
||||
// Note: This may be slower, so we only do this when we have to.
|
||||
mov(ip, Operand(lo));
|
||||
vmov(dst, VmovIndexLo, ip, cond);
|
||||
vmov(dst, VmovIndexLo, ip);
|
||||
mov(ip, Operand(hi));
|
||||
vmov(dst, VmovIndexHi, ip, cond);
|
||||
vmov(dst, VmovIndexHi, ip);
|
||||
}
|
||||
} else {
|
||||
// Move the low and high parts of the double to a D register in one
|
||||
// instruction.
|
||||
mov(ip, Operand(lo));
|
||||
mov(scratch, Operand(hi));
|
||||
vmov(dst, ip, scratch, cond);
|
||||
vmov(dst, ip, scratch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1066,8 +1066,7 @@ class Assembler : public AssemblerBase {
|
||||
|
||||
void vmov(const DwVfpRegister dst,
|
||||
double imm,
|
||||
const Register scratch = no_reg,
|
||||
const Condition cond = al);
|
||||
const Register scratch = no_reg);
|
||||
void vmov(const SwVfpRegister dst,
|
||||
const SwVfpRegister src,
|
||||
const Condition cond = al);
|
||||
|
@ -4472,10 +4472,14 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {
|
||||
if (instr->NeedsCanonicalization()) {
|
||||
// Check for NaN. All NaNs must be canonicalized.
|
||||
__ VFPCompareAndSetFlags(value, value);
|
||||
Label after_canonicalization;
|
||||
|
||||
// Only load canonical NaN if the comparison above set the overflow.
|
||||
__ b(vc, &after_canonicalization);
|
||||
__ Vmov(value,
|
||||
FixedDoubleArray::canonical_not_the_hole_nan_as_double(),
|
||||
no_reg, vs);
|
||||
FixedDoubleArray::canonical_not_the_hole_nan_as_double());
|
||||
|
||||
__ bind(&after_canonicalization);
|
||||
}
|
||||
|
||||
__ vstr(value, scratch, instr->additional_index() << element_size_shift);
|
||||
|
@ -812,19 +812,18 @@ void MacroAssembler::VFPCompareAndLoadFlags(const DwVfpRegister src1,
|
||||
|
||||
void MacroAssembler::Vmov(const DwVfpRegister dst,
|
||||
const double imm,
|
||||
const Register scratch,
|
||||
const Condition cond) {
|
||||
const Register scratch) {
|
||||
ASSERT(CpuFeatures::IsEnabled(VFP2));
|
||||
static const DoubleRepresentation minus_zero(-0.0);
|
||||
static const DoubleRepresentation zero(0.0);
|
||||
DoubleRepresentation value(imm);
|
||||
// Handle special values first.
|
||||
if (value.bits == zero.bits) {
|
||||
vmov(dst, kDoubleRegZero, cond);
|
||||
vmov(dst, kDoubleRegZero);
|
||||
} else if (value.bits == minus_zero.bits) {
|
||||
vneg(dst, kDoubleRegZero, cond);
|
||||
vneg(dst, kDoubleRegZero);
|
||||
} else {
|
||||
vmov(dst, imm, scratch, cond);
|
||||
vmov(dst, imm, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -480,8 +480,7 @@ class MacroAssembler: public Assembler {
|
||||
|
||||
void Vmov(const DwVfpRegister dst,
|
||||
const double imm,
|
||||
const Register scratch = no_reg,
|
||||
const Condition cond = al);
|
||||
const Register scratch = no_reg);
|
||||
|
||||
// Enter exit frame.
|
||||
// stack_space - extra stack space, used for alignment before call to C.
|
||||
|
Loading…
Reference in New Issue
Block a user