From 556e4859bb3d03676c08c752913c613988d684be Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Mon, 29 Jul 2019 16:00:52 +0100 Subject: [PATCH] [deoptimizer, cleanup] Don't store values of single precision fp registers Instead of storing the values of the single precision floating point registers, get their values from the aliased double precision registers. This saves, on arm64, 184 bytes per deoptimisation kind function (552 in total) and 128 bytes in the RegisterValues class. Change-Id: I681ad46efbb610e94d1e45871e012d2c0a3cfa3b Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1669687 Commit-Queue: Martyn Capewell Reviewed-by: Michael Starzinger Cr-Commit-Position: refs/heads/master@{#62961} --- src/deoptimizer/arm/deoptimizer-arm.cc | 20 ++--------- src/deoptimizer/arm64/deoptimizer-arm64.cc | 19 ++--------- src/deoptimizer/deoptimizer.h | 39 ++++++++-------------- src/deoptimizer/ia32/deoptimizer-ia32.cc | 21 ++---------- src/deoptimizer/x64/deoptimizer-x64.cc | 24 ++----------- 5 files changed, 21 insertions(+), 102 deletions(-) diff --git a/src/deoptimizer/arm/deoptimizer-arm.cc b/src/deoptimizer/arm/deoptimizer-arm.cc index 89e9988f9e..472985c2b5 100644 --- a/src/deoptimizer/arm/deoptimizer-arm.cc +++ b/src/deoptimizer/arm/deoptimizer-arm.cc @@ -28,7 +28,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, RegList restored_regs = kJSCallerSaved | kCalleeSaved | ip.bit(); const int kDoubleRegsSize = kDoubleSize * DwVfpRegister::kNumRegisters; - const int kFloatRegsSize = kFloatSize * SwVfpRegister::kNumRegisters; // Save all allocatable VFP registers before messing with them. { @@ -48,9 +47,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, // small number and we need to use condition codes. __ sub(sp, sp, Operand(16 * kDoubleSize), LeaveCC, eq); __ vstm(db_w, sp, d0, d15); - - // Push registers s0-s31 on the stack. - __ vstm(db_w, sp, s0, s31); } // Push all 16 registers (needed to populate FrameDescription::registers_). @@ -67,7 +63,7 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, } const int kSavedRegistersAreaSize = - (kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize; + (kNumberOfRegisters * kPointerSize) + kDoubleRegsSize; // Get the bailout id is passed as r10 by the caller. __ mov(r2, r10); @@ -119,23 +115,11 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { int code = config->GetAllocatableDoubleCode(i); int dst_offset = code * kDoubleSize + double_regs_offset; - int src_offset = - code * kDoubleSize + kNumberOfRegisters * kPointerSize + kFloatRegsSize; + int src_offset = code * kDoubleSize + kNumberOfRegisters * kPointerSize; __ vldr(d0, sp, src_offset); __ vstr(d0, r1, dst_offset); } - // Copy VFP registers to - // float_registers_[FloatRegister::kNumAllocatableRegisters] - int float_regs_offset = FrameDescription::float_registers_offset(); - for (int i = 0; i < config->num_allocatable_float_registers(); ++i) { - int code = config->GetAllocatableFloatCode(i); - int dst_offset = code * kFloatSize + float_regs_offset; - int src_offset = code * kFloatSize + kNumberOfRegisters * kPointerSize; - __ ldr(r2, MemOperand(sp, src_offset)); - __ str(r2, MemOperand(r1, dst_offset)); - } - // Remove the saved registers from the stack. __ add(sp, sp, Operand(kSavedRegistersAreaSize)); diff --git a/src/deoptimizer/arm64/deoptimizer-arm64.cc b/src/deoptimizer/arm64/deoptimizer-arm64.cc index a96b1263ab..77a1ab5009 100644 --- a/src/deoptimizer/arm64/deoptimizer-arm64.cc +++ b/src/deoptimizer/arm64/deoptimizer-arm64.cc @@ -111,12 +111,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, DCHECK_EQ(saved_double_registers.Count() % 2, 0); __ PushCPURegList(saved_double_registers); - CPURegList saved_float_registers( - CPURegister::kVRegister, kSRegSizeInBits, - RegisterConfiguration::Default()->allocatable_float_codes_mask()); - DCHECK_EQ(saved_float_registers.Count() % 4, 0); - __ PushCPURegList(saved_float_registers); - // We save all the registers except sp, lr, platform register (x18) and the // masm scratches. CPURegList saved_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 28); @@ -134,13 +128,10 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, const int kSavedRegistersAreaSize = (saved_registers.Count() * kXRegSize) + - (saved_double_registers.Count() * kDRegSize) + - (saved_float_registers.Count() * kSRegSize); + (saved_double_registers.Count() * kDRegSize); // Floating point registers are saved on the stack above core registers. - const int kFloatRegistersOffset = saved_registers.Count() * kXRegSize; - const int kDoubleRegistersOffset = - kFloatRegistersOffset + saved_float_registers.Count() * kSRegSize; + const int kDoubleRegistersOffset = saved_registers.Count() * kXRegSize; // The bailout id was passed by the caller in x26. Register bailout_id = x2; @@ -194,12 +185,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, CopyRegListToFrame(masm, x1, FrameDescription::double_registers_offset(), saved_double_registers, x2, x3, kDoubleRegistersOffset); - // Copy float registers to the input frame. - // TODO(arm): these are the lower 32-bits of the double registers stored - // above, so we shouldn't need to store them again. - CopyRegListToFrame(masm, x1, FrameDescription::float_registers_offset(), - saved_float_registers, w2, w3, kFloatRegistersOffset); - // Remove the saved registers from the stack. DCHECK_EQ(kSavedRegistersAreaSize % kXRegSize, 0); __ Drop(kSavedRegistersAreaSize / kXRegSize); diff --git a/src/deoptimizer/deoptimizer.h b/src/deoptimizer/deoptimizer.h index a2471247ef..145d807cf8 100644 --- a/src/deoptimizer/deoptimizer.h +++ b/src/deoptimizer/deoptimizer.h @@ -627,8 +627,16 @@ class RegisterValues { } Float32 GetFloatRegister(unsigned n) const { - DCHECK(n < arraysize(float_registers_)); - return float_registers_[n]; + DCHECK(n < arraysize(double_registers_)); + if (kSimpleFPAliasing) { + return Float32::FromBits( + static_cast(double_registers_[n].get_bits())); + } else { + const int kShift = n % 2 == 0 ? 0 : 32; + + return Float32::FromBits( + static_cast(double_registers_[n / 2].get_bits() >> kShift)); + } } Float64 GetDoubleRegister(unsigned n) const { @@ -641,23 +649,10 @@ class RegisterValues { registers_[n] = value; } - void SetFloatRegister(unsigned n, Float32 value) { - DCHECK(n < arraysize(float_registers_)); - float_registers_[n] = value; - } - - void SetDoubleRegister(unsigned n, Float64 value) { - DCHECK(n < arraysize(double_registers_)); - double_registers_[n] = value; - } - - // Generated code is writing directly into the below arrays, make sure their - // element sizes fit what the machine instructions expect. - static_assert(sizeof(Float32) == kFloatSize, "size mismatch"); - static_assert(sizeof(Float64) == kDoubleSize, "size mismatch"); - intptr_t registers_[Register::kNumRegisters]; - Float32 float_registers_[FloatRegister::kNumRegisters]; + // Generated code writes directly into the following array, make sure the + // element size matches what the machine instructions expect. + static_assert(sizeof(Float64) == kDoubleSize, "size mismatch"); Float64 double_registers_[DoubleRegister::kNumRegisters]; }; @@ -721,10 +716,6 @@ class FrameDescription { register_values_.SetRegister(n, value); } - void SetDoubleRegister(unsigned n, Float64 value) { - register_values_.SetDoubleRegister(n, value); - } - intptr_t GetTop() const { return top_; } void SetTop(intptr_t top) { top_ = top; } @@ -755,10 +746,6 @@ class FrameDescription { return OFFSET_OF(FrameDescription, register_values_.double_registers_); } - static int float_registers_offset() { - return OFFSET_OF(FrameDescription, register_values_.float_registers_); - } - static int frame_size_offset() { return offsetof(FrameDescription, frame_size_); } diff --git a/src/deoptimizer/ia32/deoptimizer-ia32.cc b/src/deoptimizer/ia32/deoptimizer-ia32.cc index f40ff562be..ab1d2ef481 100644 --- a/src/deoptimizer/ia32/deoptimizer-ia32.cc +++ b/src/deoptimizer/ia32/deoptimizer-ia32.cc @@ -33,24 +33,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, __ movsd(Operand(esp, offset), xmm_reg); } - STATIC_ASSERT(kFloatSize == kSystemPointerSize); - const int kFloatRegsSize = kFloatSize * XMMRegister::kNumRegisters; - __ AllocateStackSpace(kFloatRegsSize); - for (int i = 0; i < config->num_allocatable_float_registers(); ++i) { - int code = config->GetAllocatableFloatCode(i); - XMMRegister xmm_reg = XMMRegister::from_code(code); - int offset = code * kFloatSize; - __ movss(Operand(esp, offset), xmm_reg); - } - __ pushad(); ExternalReference c_entry_fp_address = ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate); __ mov(masm->ExternalReferenceAsOperand(c_entry_fp_address, esi), ebp); - const int kSavedRegistersAreaSize = kNumberOfRegisters * kSystemPointerSize + - kDoubleRegsSize + kFloatRegsSize; + const int kSavedRegistersAreaSize = + kNumberOfRegisters * kSystemPointerSize + kDoubleRegsSize; // The bailout id is passed in ebx by the caller. @@ -94,13 +84,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, __ pop(Operand(esi, offset)); } - int float_regs_offset = FrameDescription::float_registers_offset(); - // Fill in the float input registers. - for (int i = 0; i < XMMRegister::kNumRegisters; i++) { - int dst_offset = i * kFloatSize + float_regs_offset; - __ pop(Operand(esi, dst_offset)); - } - int double_regs_offset = FrameDescription::double_registers_offset(); // Fill in the double input registers. for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { diff --git a/src/deoptimizer/x64/deoptimizer-x64.cc b/src/deoptimizer/x64/deoptimizer-x64.cc index cfdd6c9ef1..31680c4690 100644 --- a/src/deoptimizer/x64/deoptimizer-x64.cc +++ b/src/deoptimizer/x64/deoptimizer-x64.cc @@ -34,16 +34,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, __ Movsd(Operand(rsp, offset), xmm_reg); } - const int kFloatRegsSize = kFloatSize * XMMRegister::kNumRegisters; - __ AllocateStackSpace(kFloatRegsSize); - - for (int i = 0; i < config->num_allocatable_float_registers(); ++i) { - int code = config->GetAllocatableFloatCode(i); - XMMRegister xmm_reg = XMMRegister::from_code(code); - int offset = code * kFloatSize; - __ Movss(Operand(rsp, offset), xmm_reg); - } - // We push all registers onto the stack, even though we do not need // to restore all later. for (int i = 0; i < kNumberOfRegisters; i++) { @@ -51,8 +41,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, __ pushq(r); } - const int kSavedRegistersAreaSize = kNumberOfRegisters * kSystemPointerSize + - kDoubleRegsSize + kFloatRegsSize; + const int kSavedRegistersAreaSize = + kNumberOfRegisters * kSystemPointerSize + kDoubleRegsSize; __ Store( ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate), @@ -112,16 +102,6 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, __ PopQuad(Operand(rbx, offset)); } - // Fill in the float input registers. - int float_regs_offset = FrameDescription::float_registers_offset(); - for (int i = 0; i < XMMRegister::kNumRegisters; i++) { - int src_offset = i * kFloatSize; - int dst_offset = i * kFloatSize + float_regs_offset; - __ movl(rcx, Operand(rsp, src_offset)); - __ movl(Operand(rbx, dst_offset), rcx); - } - __ addq(rsp, Immediate(kFloatRegsSize)); - // Fill in the double input registers. int double_regs_offset = FrameDescription::double_registers_offset(); for (int i = 0; i < XMMRegister::kNumRegisters; i++) {