[deoptimizer, cleanup] Don't store values of single precision fp registers

Instead of storing the values of the single precision floating point registers,
get their values from the aliased double precision registers.

This saves, on arm64, 184 bytes per deoptimisation kind function (552 in total)
and 128 bytes in the RegisterValues class.

Change-Id: I681ad46efbb610e94d1e45871e012d2c0a3cfa3b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1669687
Commit-Queue: Martyn Capewell <martyn.capewell@arm.com>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62961}
This commit is contained in:
Joey Gouly 2019-07-29 16:00:52 +01:00 committed by Commit Bot
parent a250aa77bb
commit 556e4859bb
5 changed files with 21 additions and 102 deletions

View File

@ -28,7 +28,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
RegList restored_regs = kJSCallerSaved | kCalleeSaved | ip.bit();
const int kDoubleRegsSize = kDoubleSize * DwVfpRegister::kNumRegisters;
const int kFloatRegsSize = kFloatSize * SwVfpRegister::kNumRegisters;
// Save all allocatable VFP registers before messing with them.
{
@ -48,9 +47,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
// small number and we need to use condition codes.
__ sub(sp, sp, Operand(16 * kDoubleSize), LeaveCC, eq);
__ vstm(db_w, sp, d0, d15);
// Push registers s0-s31 on the stack.
__ vstm(db_w, sp, s0, s31);
}
// Push all 16 registers (needed to populate FrameDescription::registers_).
@ -67,7 +63,7 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
}
const int kSavedRegistersAreaSize =
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize;
// Get the bailout id is passed as r10 by the caller.
__ mov(r2, r10);
@ -119,23 +115,11 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
for (int i = 0; i < config->num_allocatable_double_registers(); ++i) {
int code = config->GetAllocatableDoubleCode(i);
int dst_offset = code * kDoubleSize + double_regs_offset;
int src_offset =
code * kDoubleSize + kNumberOfRegisters * kPointerSize + kFloatRegsSize;
int src_offset = code * kDoubleSize + kNumberOfRegisters * kPointerSize;
__ vldr(d0, sp, src_offset);
__ vstr(d0, r1, dst_offset);
}
// Copy VFP registers to
// float_registers_[FloatRegister::kNumAllocatableRegisters]
int float_regs_offset = FrameDescription::float_registers_offset();
for (int i = 0; i < config->num_allocatable_float_registers(); ++i) {
int code = config->GetAllocatableFloatCode(i);
int dst_offset = code * kFloatSize + float_regs_offset;
int src_offset = code * kFloatSize + kNumberOfRegisters * kPointerSize;
__ ldr(r2, MemOperand(sp, src_offset));
__ str(r2, MemOperand(r1, dst_offset));
}
// Remove the saved registers from the stack.
__ add(sp, sp, Operand(kSavedRegistersAreaSize));

View File

@ -111,12 +111,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
DCHECK_EQ(saved_double_registers.Count() % 2, 0);
__ PushCPURegList(saved_double_registers);
CPURegList saved_float_registers(
CPURegister::kVRegister, kSRegSizeInBits,
RegisterConfiguration::Default()->allocatable_float_codes_mask());
DCHECK_EQ(saved_float_registers.Count() % 4, 0);
__ PushCPURegList(saved_float_registers);
// We save all the registers except sp, lr, platform register (x18) and the
// masm scratches.
CPURegList saved_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 28);
@ -134,13 +128,10 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
const int kSavedRegistersAreaSize =
(saved_registers.Count() * kXRegSize) +
(saved_double_registers.Count() * kDRegSize) +
(saved_float_registers.Count() * kSRegSize);
(saved_double_registers.Count() * kDRegSize);
// Floating point registers are saved on the stack above core registers.
const int kFloatRegistersOffset = saved_registers.Count() * kXRegSize;
const int kDoubleRegistersOffset =
kFloatRegistersOffset + saved_float_registers.Count() * kSRegSize;
const int kDoubleRegistersOffset = saved_registers.Count() * kXRegSize;
// The bailout id was passed by the caller in x26.
Register bailout_id = x2;
@ -194,12 +185,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
CopyRegListToFrame(masm, x1, FrameDescription::double_registers_offset(),
saved_double_registers, x2, x3, kDoubleRegistersOffset);
// Copy float registers to the input frame.
// TODO(arm): these are the lower 32-bits of the double registers stored
// above, so we shouldn't need to store them again.
CopyRegListToFrame(masm, x1, FrameDescription::float_registers_offset(),
saved_float_registers, w2, w3, kFloatRegistersOffset);
// Remove the saved registers from the stack.
DCHECK_EQ(kSavedRegistersAreaSize % kXRegSize, 0);
__ Drop(kSavedRegistersAreaSize / kXRegSize);

View File

@ -627,8 +627,16 @@ class RegisterValues {
}
Float32 GetFloatRegister(unsigned n) const {
DCHECK(n < arraysize(float_registers_));
return float_registers_[n];
DCHECK(n < arraysize(double_registers_));
if (kSimpleFPAliasing) {
return Float32::FromBits(
static_cast<uint32_t>(double_registers_[n].get_bits()));
} else {
const int kShift = n % 2 == 0 ? 0 : 32;
return Float32::FromBits(
static_cast<uint32_t>(double_registers_[n / 2].get_bits() >> kShift));
}
}
Float64 GetDoubleRegister(unsigned n) const {
@ -641,23 +649,10 @@ class RegisterValues {
registers_[n] = value;
}
void SetFloatRegister(unsigned n, Float32 value) {
DCHECK(n < arraysize(float_registers_));
float_registers_[n] = value;
}
void SetDoubleRegister(unsigned n, Float64 value) {
DCHECK(n < arraysize(double_registers_));
double_registers_[n] = value;
}
// Generated code is writing directly into the below arrays, make sure their
// element sizes fit what the machine instructions expect.
static_assert(sizeof(Float32) == kFloatSize, "size mismatch");
static_assert(sizeof(Float64) == kDoubleSize, "size mismatch");
intptr_t registers_[Register::kNumRegisters];
Float32 float_registers_[FloatRegister::kNumRegisters];
// Generated code writes directly into the following array, make sure the
// element size matches what the machine instructions expect.
static_assert(sizeof(Float64) == kDoubleSize, "size mismatch");
Float64 double_registers_[DoubleRegister::kNumRegisters];
};
@ -721,10 +716,6 @@ class FrameDescription {
register_values_.SetRegister(n, value);
}
void SetDoubleRegister(unsigned n, Float64 value) {
register_values_.SetDoubleRegister(n, value);
}
intptr_t GetTop() const { return top_; }
void SetTop(intptr_t top) { top_ = top; }
@ -755,10 +746,6 @@ class FrameDescription {
return OFFSET_OF(FrameDescription, register_values_.double_registers_);
}
static int float_registers_offset() {
return OFFSET_OF(FrameDescription, register_values_.float_registers_);
}
static int frame_size_offset() {
return offsetof(FrameDescription, frame_size_);
}

View File

@ -33,24 +33,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
__ movsd(Operand(esp, offset), xmm_reg);
}
STATIC_ASSERT(kFloatSize == kSystemPointerSize);
const int kFloatRegsSize = kFloatSize * XMMRegister::kNumRegisters;
__ AllocateStackSpace(kFloatRegsSize);
for (int i = 0; i < config->num_allocatable_float_registers(); ++i) {
int code = config->GetAllocatableFloatCode(i);
XMMRegister xmm_reg = XMMRegister::from_code(code);
int offset = code * kFloatSize;
__ movss(Operand(esp, offset), xmm_reg);
}
__ pushad();
ExternalReference c_entry_fp_address =
ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate);
__ mov(masm->ExternalReferenceAsOperand(c_entry_fp_address, esi), ebp);
const int kSavedRegistersAreaSize = kNumberOfRegisters * kSystemPointerSize +
kDoubleRegsSize + kFloatRegsSize;
const int kSavedRegistersAreaSize =
kNumberOfRegisters * kSystemPointerSize + kDoubleRegsSize;
// The bailout id is passed in ebx by the caller.
@ -94,13 +84,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
__ pop(Operand(esi, offset));
}
int float_regs_offset = FrameDescription::float_registers_offset();
// Fill in the float input registers.
for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
int dst_offset = i * kFloatSize + float_regs_offset;
__ pop(Operand(esi, dst_offset));
}
int double_regs_offset = FrameDescription::double_registers_offset();
// Fill in the double input registers.
for (int i = 0; i < config->num_allocatable_double_registers(); ++i) {

View File

@ -34,16 +34,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
__ Movsd(Operand(rsp, offset), xmm_reg);
}
const int kFloatRegsSize = kFloatSize * XMMRegister::kNumRegisters;
__ AllocateStackSpace(kFloatRegsSize);
for (int i = 0; i < config->num_allocatable_float_registers(); ++i) {
int code = config->GetAllocatableFloatCode(i);
XMMRegister xmm_reg = XMMRegister::from_code(code);
int offset = code * kFloatSize;
__ Movss(Operand(rsp, offset), xmm_reg);
}
// We push all registers onto the stack, even though we do not need
// to restore all later.
for (int i = 0; i < kNumberOfRegisters; i++) {
@ -51,8 +41,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
__ pushq(r);
}
const int kSavedRegistersAreaSize = kNumberOfRegisters * kSystemPointerSize +
kDoubleRegsSize + kFloatRegsSize;
const int kSavedRegistersAreaSize =
kNumberOfRegisters * kSystemPointerSize + kDoubleRegsSize;
__ Store(
ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate),
@ -112,16 +102,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
__ PopQuad(Operand(rbx, offset));
}
// Fill in the float input registers.
int float_regs_offset = FrameDescription::float_registers_offset();
for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
int src_offset = i * kFloatSize;
int dst_offset = i * kFloatSize + float_regs_offset;
__ movl(rcx, Operand(rsp, src_offset));
__ movl(Operand(rbx, dst_offset), rcx);
}
__ addq(rsp, Immediate(kFloatRegsSize));
// Fill in the double input registers.
int double_regs_offset = FrameDescription::double_registers_offset();
for (int i = 0; i < XMMRegister::kNumRegisters; i++) {