[turbofan] ARM: Reduce out-of-line NaN code size
Reduce the amount of code generated for OutOfLineLoadFloat* by computing sqrt(-1) rather than move the NaN as an immediate. Add support for single precision floating point immediate moves to enable this. BUG= Review URL: https://codereview.chromium.org/1758003003 Cr-Commit-Position: refs/heads/master@{#34746}
This commit is contained in:
parent
d358357478
commit
2cd9877b6d
@ -2547,12 +2547,6 @@ void Assembler::vstm(BlockAddrMode am,
|
||||
}
|
||||
|
||||
|
||||
void Assembler::vmov(const SwVfpRegister dst, float imm) {
|
||||
mov(ip, Operand(bit_cast<int32_t>(imm)));
|
||||
vmov(dst, ip);
|
||||
}
|
||||
|
||||
|
||||
static void DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) {
|
||||
uint64_t i;
|
||||
memcpy(&i, &d, 8);
|
||||
@ -2564,7 +2558,7 @@ static void DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) {
|
||||
|
||||
// Only works for little endian floating point formats.
|
||||
// We don't support VFP on the mixed endian floating point platform.
|
||||
static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
|
||||
static bool FitsVmovFPImmediate(double d, uint32_t* encoding) {
|
||||
DCHECK(CpuFeatures::IsSupported(VFP3));
|
||||
|
||||
// VMOV can accept an immediate of the form:
|
||||
@ -2593,12 +2587,12 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Bits 62:55 must be all clear or all set.
|
||||
// Bits 61:54 must be all clear or all set.
|
||||
if (((hi & 0x3fc00000) != 0) && ((hi & 0x3fc00000) != 0x3fc00000)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Bit 63 must be NOT bit 62.
|
||||
// Bit 62 must be NOT bit 61.
|
||||
if (((hi ^ (hi << 1)) & (0x40000000)) == 0) {
|
||||
return false;
|
||||
}
|
||||
@ -2613,6 +2607,25 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::vmov(const SwVfpRegister dst, float imm) {
|
||||
uint32_t enc;
|
||||
if (CpuFeatures::IsSupported(VFP3) && FitsVmovFPImmediate(imm, &enc)) {
|
||||
// The float can be encoded in the instruction.
|
||||
//
|
||||
// Sd = immediate
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-936.
|
||||
// cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
|
||||
// Vd(15-12) | 101(11-9) | sz=0(8) | imm4L(3-0)
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | enc);
|
||||
} else {
|
||||
mov(ip, Operand(bit_cast<int32_t>(imm)));
|
||||
vmov(dst, ip);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Assembler::vmov(const DwVfpRegister dst,
|
||||
double imm,
|
||||
const Register scratch) {
|
||||
@ -2623,7 +2636,7 @@ void Assembler::vmov(const DwVfpRegister dst,
|
||||
// pointer (pp) is valid.
|
||||
bool can_use_pool =
|
||||
!FLAG_enable_embedded_constant_pool || is_constant_pool_available();
|
||||
if (CpuFeatures::IsSupported(VFP3) && FitsVMOVDoubleImmediate(imm, &enc)) {
|
||||
if (CpuFeatures::IsSupported(VFP3) && FitsVmovFPImmediate(imm, &enc)) {
|
||||
// The double can be encoded in the instruction.
|
||||
//
|
||||
// Dd = immediate
|
||||
|
@ -654,7 +654,7 @@ class Instruction {
|
||||
inline bool HasH() const { return HValue() == 1; }
|
||||
inline bool HasLink() const { return LinkValue() == 1; }
|
||||
|
||||
// Decoding the double immediate in the vmov instruction.
|
||||
// Decode the double immediate from a vmov instruction.
|
||||
double DoubleImmedVmov() const;
|
||||
|
||||
// Instructions are read of out a code stream. The only way to get a
|
||||
|
@ -1430,7 +1430,7 @@ void Decoder::DecodeTypeVFP(Instruction* instr) {
|
||||
if (instr->SzValue() == 0x1) {
|
||||
Format(instr, "vmov'cond.f64 'Dd, 'd");
|
||||
} else {
|
||||
Unknown(instr); // Not used by V8.
|
||||
Format(instr, "vmov'cond.f32 'Sd, 'd");
|
||||
}
|
||||
} else if (((instr->Opc2Value() == 0x6)) && instr->Opc3Value() == 0x3) {
|
||||
// vrintz - round towards zero (truncate)
|
||||
|
@ -3256,7 +3256,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
|
||||
if (instr->SzValue() == 0x1) {
|
||||
set_d_register_from_double(vd, instr->DoubleImmedVmov());
|
||||
} else {
|
||||
UNREACHABLE(); // Not used by v8.
|
||||
set_s_register_from_float(d, instr->DoubleImmedVmov());
|
||||
}
|
||||
} else if (((instr->Opc2Value() == 0x6)) && (instr->Opc3Value() == 0x3)) {
|
||||
// vrintz - truncate
|
||||
|
@ -168,7 +168,9 @@ class OutOfLineLoadFloat32 final : public OutOfLineCode {
|
||||
: OutOfLineCode(gen), result_(result) {}
|
||||
|
||||
void Generate() final {
|
||||
__ vmov(result_, std::numeric_limits<float>::quiet_NaN());
|
||||
// Compute sqrtf(-1.0f), which results in a quiet single-precision NaN.
|
||||
__ vmov(result_, -1.0f);
|
||||
__ vsqrt(result_, result_);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -182,7 +184,9 @@ class OutOfLineLoadFloat64 final : public OutOfLineCode {
|
||||
: OutOfLineCode(gen), result_(result) {}
|
||||
|
||||
void Generate() final {
|
||||
__ vmov(result_, std::numeric_limits<double>::quiet_NaN(), kScratchReg);
|
||||
// Compute sqrt(-1.0), which results in a quiet double-precision NaN.
|
||||
__ vmov(result_, -1.0);
|
||||
__ vsqrt(result_, result_);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -232,6 +232,8 @@ TEST(4) {
|
||||
double j;
|
||||
double m;
|
||||
double n;
|
||||
float o;
|
||||
float p;
|
||||
float x;
|
||||
float y;
|
||||
} T;
|
||||
@ -314,6 +316,12 @@ TEST(4) {
|
||||
__ vneg(d0, d1);
|
||||
__ vstr(d0, r4, offsetof(T, n));
|
||||
|
||||
// Test vmov for single-precision immediates.
|
||||
__ vmov(s0, 0.25f);
|
||||
__ vstr(s0, r4, offsetof(T, o));
|
||||
__ vmov(s0, -16.0f);
|
||||
__ vstr(s0, r4, offsetof(T, p));
|
||||
|
||||
__ ldm(ia_w, sp, r4.bit() | fp.bit() | pc.bit());
|
||||
|
||||
CodeDesc desc;
|
||||
@ -341,6 +349,8 @@ TEST(4) {
|
||||
t.y = 9.0;
|
||||
Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
|
||||
USE(dummy);
|
||||
CHECK_EQ(-16.0f, t.p);
|
||||
CHECK_EQ(0.25f, t.o);
|
||||
CHECK_EQ(-123.456, t.n);
|
||||
CHECK_EQ(2718.2818, t.m);
|
||||
CHECK_EQ(2, t.i);
|
||||
|
@ -614,6 +614,11 @@ TEST(Vfp) {
|
||||
COMPARE(vmov(d2, -13.0),
|
||||
"eeba2b0a vmov.f64 d2, #-13");
|
||||
|
||||
COMPARE(vmov(s1, -1.0),
|
||||
"eeff0a00 vmov.f32 s1, #-1");
|
||||
COMPARE(vmov(s3, 13.0),
|
||||
"eef21a0a vmov.f32 s3, #13");
|
||||
|
||||
COMPARE(vmov(d0, VmovIndexLo, r0),
|
||||
"ee000b10 vmov.32 d0[0], r0");
|
||||
COMPARE(vmov(d0, VmovIndexHi, r0),
|
||||
|
Loading…
Reference in New Issue
Block a user