[turbofan] ARM: Reduce out-of-line NaN code size

Reduce the amount of code generated for OutOfLineLoadFloat* by computing
sqrt(-1) rather than move the NaN as an immediate. Add support for single
precision floating point immediate moves to enable this.

BUG=

Review URL: https://codereview.chromium.org/1758003003

Cr-Commit-Position: refs/heads/master@{#34746}
This commit is contained in:
martyn.capewell 2016-03-14 08:30:13 -07:00 committed by Commit bot
parent d358357478
commit 2cd9877b6d
7 changed files with 47 additions and 15 deletions

View File

@ -2547,12 +2547,6 @@ void Assembler::vstm(BlockAddrMode am,
}
void Assembler::vmov(const SwVfpRegister dst, float imm) {
mov(ip, Operand(bit_cast<int32_t>(imm)));
vmov(dst, ip);
}
static void DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) {
uint64_t i;
memcpy(&i, &d, 8);
@ -2564,7 +2558,7 @@ static void DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) {
// Only works for little endian floating point formats.
// We don't support VFP on the mixed endian floating point platform.
static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
static bool FitsVmovFPImmediate(double d, uint32_t* encoding) {
DCHECK(CpuFeatures::IsSupported(VFP3));
// VMOV can accept an immediate of the form:
@ -2593,12 +2587,12 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
return false;
}
// Bits 62:55 must be all clear or all set.
// Bits 61:54 must be all clear or all set.
if (((hi & 0x3fc00000) != 0) && ((hi & 0x3fc00000) != 0x3fc00000)) {
return false;
}
// Bit 63 must be NOT bit 62.
// Bit 62 must be NOT bit 61.
if (((hi ^ (hi << 1)) & (0x40000000)) == 0) {
return false;
}
@ -2613,6 +2607,25 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
}
void Assembler::vmov(const SwVfpRegister dst, float imm) {
uint32_t enc;
if (CpuFeatures::IsSupported(VFP3) && FitsVmovFPImmediate(imm, &enc)) {
// The float can be encoded in the instruction.
//
// Sd = immediate
// Instruction details available in ARM DDI 0406C.b, A8-936.
// cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
// Vd(15-12) | 101(11-9) | sz=0(8) | imm4L(3-0)
int vd, d;
dst.split_code(&vd, &d);
emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | enc);
} else {
mov(ip, Operand(bit_cast<int32_t>(imm)));
vmov(dst, ip);
}
}
void Assembler::vmov(const DwVfpRegister dst,
double imm,
const Register scratch) {
@ -2623,7 +2636,7 @@ void Assembler::vmov(const DwVfpRegister dst,
// pointer (pp) is valid.
bool can_use_pool =
!FLAG_enable_embedded_constant_pool || is_constant_pool_available();
if (CpuFeatures::IsSupported(VFP3) && FitsVMOVDoubleImmediate(imm, &enc)) {
if (CpuFeatures::IsSupported(VFP3) && FitsVmovFPImmediate(imm, &enc)) {
// The double can be encoded in the instruction.
//
// Dd = immediate

View File

@ -654,7 +654,7 @@ class Instruction {
inline bool HasH() const { return HValue() == 1; }
inline bool HasLink() const { return LinkValue() == 1; }
// Decoding the double immediate in the vmov instruction.
// Decode the double immediate from a vmov instruction.
double DoubleImmedVmov() const;
// Instructions are read of out a code stream. The only way to get a

View File

@ -1430,7 +1430,7 @@ void Decoder::DecodeTypeVFP(Instruction* instr) {
if (instr->SzValue() == 0x1) {
Format(instr, "vmov'cond.f64 'Dd, 'd");
} else {
Unknown(instr); // Not used by V8.
Format(instr, "vmov'cond.f32 'Sd, 'd");
}
} else if (((instr->Opc2Value() == 0x6)) && instr->Opc3Value() == 0x3) {
// vrintz - round towards zero (truncate)

View File

@ -3256,7 +3256,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
if (instr->SzValue() == 0x1) {
set_d_register_from_double(vd, instr->DoubleImmedVmov());
} else {
UNREACHABLE(); // Not used by v8.
set_s_register_from_float(d, instr->DoubleImmedVmov());
}
} else if (((instr->Opc2Value() == 0x6)) && (instr->Opc3Value() == 0x3)) {
// vrintz - truncate

View File

@ -168,7 +168,9 @@ class OutOfLineLoadFloat32 final : public OutOfLineCode {
: OutOfLineCode(gen), result_(result) {}
void Generate() final {
__ vmov(result_, std::numeric_limits<float>::quiet_NaN());
// Compute sqrtf(-1.0f), which results in a quiet single-precision NaN.
__ vmov(result_, -1.0f);
__ vsqrt(result_, result_);
}
private:
@ -182,7 +184,9 @@ class OutOfLineLoadFloat64 final : public OutOfLineCode {
: OutOfLineCode(gen), result_(result) {}
void Generate() final {
__ vmov(result_, std::numeric_limits<double>::quiet_NaN(), kScratchReg);
// Compute sqrt(-1.0), which results in a quiet double-precision NaN.
__ vmov(result_, -1.0);
__ vsqrt(result_, result_);
}
private:

View File

@ -232,6 +232,8 @@ TEST(4) {
double j;
double m;
double n;
float o;
float p;
float x;
float y;
} T;
@ -314,6 +316,12 @@ TEST(4) {
__ vneg(d0, d1);
__ vstr(d0, r4, offsetof(T, n));
// Test vmov for single-precision immediates.
__ vmov(s0, 0.25f);
__ vstr(s0, r4, offsetof(T, o));
__ vmov(s0, -16.0f);
__ vstr(s0, r4, offsetof(T, p));
__ ldm(ia_w, sp, r4.bit() | fp.bit() | pc.bit());
CodeDesc desc;
@ -341,6 +349,8 @@ TEST(4) {
t.y = 9.0;
Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ(-16.0f, t.p);
CHECK_EQ(0.25f, t.o);
CHECK_EQ(-123.456, t.n);
CHECK_EQ(2718.2818, t.m);
CHECK_EQ(2, t.i);

View File

@ -614,6 +614,11 @@ TEST(Vfp) {
COMPARE(vmov(d2, -13.0),
"eeba2b0a vmov.f64 d2, #-13");
COMPARE(vmov(s1, -1.0),
"eeff0a00 vmov.f32 s1, #-1");
COMPARE(vmov(s3, 13.0),
"eef21a0a vmov.f32 s3, #13");
COMPARE(vmov(d0, VmovIndexLo, r0),
"ee000b10 vmov.32 d0[0], r0");
COMPARE(vmov(d0, VmovIndexHi, r0),