[loong64][regalloc] Improve gap resolver algorithm

Port commit a77dd89e4c

Bug: v8:5210, chromium:1269989, chromium:1313647
Change-Id: Id6ac5031aff1a63ff4bac916a4f3ab6d9d6b97ba
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3686210
Auto-Submit: Liu Yu <liuyu@loongson.cn>
Reviewed-by: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Commit-Queue: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Cr-Commit-Position: refs/heads/main@{#80910}
This commit is contained in:
Liu Yu 2022-06-02 15:14:30 +08:00 committed by V8 LUCI CQ
parent d173de8953
commit 2b6cbda0b4
7 changed files with 320 additions and 4 deletions

View File

@ -167,7 +167,8 @@ void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
Assembler::Assembler(const AssemblerOptions& options,
std::unique_ptr<AssemblerBuffer> buffer)
: AssemblerBase(options, std::move(buffer)),
scratch_register_list_({t7, t6}) {
scratch_register_list_({t7, t6}),
scratch_fpregister_list_({f31}) {
reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
last_trampoline_pool_end_ = 0;
@ -2353,10 +2354,13 @@ void Assembler::set_target_value_at(Address pc, uint64_t target,
UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
: available_(assembler->GetScratchRegisterList()),
old_available_(*available_) {}
availablefp_(assembler->GetScratchFPRegisterList()),
old_available_(*available_),
old_availablefp_(*availablefp_) {}
UseScratchRegisterScope::~UseScratchRegisterScope() {
*available_ = old_available_;
*availablefp_ = old_availablefp_;
}
Register UseScratchRegisterScope::Acquire() {
@ -2364,10 +2368,19 @@ Register UseScratchRegisterScope::Acquire() {
return available_->PopFirst();
}
DoubleRegister UseScratchRegisterScope::AcquireFp() {
DCHECK_NOT_NULL(availablefp_);
return availablefp_->PopFirst();
}
bool UseScratchRegisterScope::hasAvailable() const {
return !available_->is_empty();
}
bool UseScratchRegisterScope::hasAvailableFp() const {
return !availablefp_->is_empty();
}
} // namespace internal
} // namespace v8

View File

@ -277,6 +277,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
RegList* GetScratchRegisterList() { return &scratch_register_list_; }
DoubleRegList* GetScratchFPRegisterList() {
return &scratch_fpregister_list_;
}
// ---------------------------------------------------------------------------
// Code generation.
@ -1065,6 +1069,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
RegList scratch_register_list_;
DoubleRegList scratch_fpregister_list_;
private:
void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
@ -1087,22 +1093,38 @@ class V8_EXPORT_PRIVATE V8_NODISCARD UseScratchRegisterScope {
~UseScratchRegisterScope();
Register Acquire();
DoubleRegister AcquireFp();
bool hasAvailable() const;
bool hasAvailableFp() const;
void Include(const RegList& list) { *available_ |= list; }
void IncludeFp(const DoubleRegList& list) { *availablefp_ |= list; }
void Exclude(const RegList& list) { available_->clear(list); }
void ExcludeFp(const DoubleRegList& list) { availablefp_->clear(list); }
void Include(const Register& reg1, const Register& reg2 = no_reg) {
RegList list({reg1, reg2});
Include(list);
}
void IncludeFp(const DoubleRegister& reg1,
const DoubleRegister& reg2 = no_dreg) {
DoubleRegList list({reg1, reg2});
IncludeFp(list);
}
void Exclude(const Register& reg1, const Register& reg2 = no_reg) {
RegList list({reg1, reg2});
Exclude(list);
}
void ExcludeFp(const DoubleRegister& reg1,
const DoubleRegister& reg2 = no_dreg) {
DoubleRegList list({reg1, reg2});
ExcludeFp(list);
}
private:
RegList* available_;
DoubleRegList* availablefp_;
RegList old_available_;
DoubleRegList old_availablefp_;
};
} // namespace internal

View File

@ -1083,6 +1083,19 @@ void TurboAssembler::GenerateSwitchTable(Register index, size_t case_count,
}
}
struct MoveCycleState {
// List of scratch registers reserved for pending moves in a move cycle, and
// which should therefore not be used as a temporary location by
// {MoveToTempLocation}.
RegList scratch_regs;
DoubleRegList scratch_fpregs;
// Available scratch registers during the move cycle resolution scope.
base::Optional<UseScratchRegisterScope> temps;
// Scratch register picked by {MoveToTempLocation}.
base::Optional<Register> scratch_reg;
base::Optional<DoubleRegister> scratch_fpreg;
};
#define ACCESS_MASM(masm) masm->
} // namespace internal

View File

@ -151,8 +151,7 @@ constexpr Register cp = s7;
constexpr Register kScratchReg = s3;
constexpr Register kScratchReg2 = s4;
constexpr DoubleRegister kScratchDoubleReg = f30;
constexpr DoubleRegister kScratchDoubleReg1 = f30;
constexpr DoubleRegister kScratchDoubleReg2 = f31;
constexpr DoubleRegister kScratchDoubleReg1 = f31;
// FPU zero reg is often used to hold 0.0, but it's not hardwired to 0.0.
constexpr DoubleRegister kDoubleRegZero = f29;

View File

@ -1290,6 +1290,17 @@ void TurboAssembler::GenerateSwitchTable(Register index, size_t case_count,
}
}
struct MoveCycleState {
// List of scratch registers reserved for pending moves in a move cycle, and
// which should therefore not be used as a temporary location by
// {MoveToTempLocation}.
RegList scratch_regs;
// Available scratch registers during the move cycle resolution scope.
base::Optional<UseScratchRegisterScope> temps;
// Scratch register picked by {MoveToTempLocation}.
base::Optional<Register> scratch_reg;
};
#define ACCESS_MASM(masm) masm->
} // namespace internal

View File

@ -5,6 +5,7 @@
#include "src/codegen/assembler-inl.h"
#include "src/codegen/callable.h"
#include "src/codegen/loong64/constants-loong64.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/optimized-compilation-info.h"
#include "src/compiler/backend/code-generator-impl.h"
@ -2407,6 +2408,156 @@ void CodeGenerator::FinishCode() {}
void CodeGenerator::PrepareForDeoptimizationExits(
ZoneDeque<DeoptimizationExit*>* exits) {}
void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
// Must be kept in sync with {MoveTempLocationTo}.
DCHECK(!source->IsImmediate());
auto rep = LocationOperand::cast(source)->representation();
move_cycle_.temps.emplace(tasm());
auto& temps = *move_cycle_.temps;
// Temporarily exclude the reserved scratch registers while we pick one to
// resolve the move cycle. Re-include them immediately afterwards as they
// might be needed for the move to the temp location.
temps.Exclude(move_cycle_.scratch_regs);
temps.ExcludeFp(move_cycle_.scratch_fpregs);
if (!IsFloatingPoint(rep)) {
if (temps.hasAvailable()) {
Register scratch = move_cycle_.temps->Acquire();
move_cycle_.scratch_reg.emplace(scratch);
} else if (temps.hasAvailableFp()) {
// Try to use an FP register if no GP register is available for non-FP
// moves.
FPURegister scratch = move_cycle_.temps->AcquireFp();
move_cycle_.scratch_fpreg.emplace(scratch);
}
} else {
DCHECK(temps.hasAvailableFp());
FPURegister scratch = move_cycle_.temps->AcquireFp();
move_cycle_.scratch_fpreg.emplace(scratch);
}
temps.Include(move_cycle_.scratch_regs);
temps.IncludeFp(move_cycle_.scratch_fpregs);
if (move_cycle_.scratch_reg.has_value()) {
// A scratch register is available for this rep.
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
move_cycle_.scratch_reg->code());
AssembleMove(source, &scratch);
} else if (move_cycle_.scratch_fpreg.has_value()) {
// A scratch fp register is available for this rep.
if (!IsFloatingPoint(rep)) {
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
move_cycle_.scratch_fpreg->code());
Loong64OperandConverter g(this, nullptr);
if (source->IsStackSlot()) {
__ Fld_d(g.ToDoubleRegister(&scratch), g.ToMemOperand(source));
} else {
DCHECK(source->IsRegister());
__ movgr2fr_d(g.ToDoubleRegister(&scratch), g.ToRegister(source));
}
} else {
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
move_cycle_.scratch_fpreg->code());
AssembleMove(source, &scratch);
}
} else {
// The scratch registers are blocked by pending moves. Use the stack
// instead.
int new_slots = ElementSizeInPointers(rep);
Loong64OperandConverter g(this, nullptr);
if (source->IsRegister()) {
__ Push(g.ToRegister(source));
} else if (source->IsStackSlot()) {
UseScratchRegisterScope temps2(tasm());
Register scratch = temps2.Acquire();
__ Ld_d(scratch, g.ToMemOperand(source));
__ Push(scratch);
} else {
// No push instruction for this operand type. Bump the stack pointer and
// assemble the move.
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
__ Sub_d(sp, sp, Operand(new_slots * kSystemPointerSize));
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(source, &temp);
}
frame_access_state()->IncreaseSPDelta(new_slots);
}
}
void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
MachineRepresentation rep) {
if (move_cycle_.scratch_reg.has_value()) {
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
move_cycle_.scratch_reg->code());
AssembleMove(&scratch, dest);
} else if (move_cycle_.scratch_fpreg.has_value()) {
if (!IsFloatingPoint(rep)) {
// We used a DoubleRegister to move a non-FP operand, change the
// representation to correctly interpret the InstructionOperand's code.
AllocatedOperand scratch(LocationOperand::REGISTER,
MachineRepresentation::kFloat64,
move_cycle_.scratch_fpreg->code());
Loong64OperandConverter g(this, nullptr);
if (dest->IsStackSlot()) {
__ Fst_d(g.ToDoubleRegister(&scratch), g.ToMemOperand(dest));
} else {
DCHECK(dest->IsRegister());
__ movfr2gr_d(g.ToRegister(dest), g.ToDoubleRegister(&scratch));
}
} else {
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
move_cycle_.scratch_fpreg->code());
AssembleMove(&scratch, dest);
}
} else {
int new_slots = ElementSizeInPointers(rep);
frame_access_state()->IncreaseSPDelta(-new_slots);
Loong64OperandConverter g(this, nullptr);
if (dest->IsRegister()) {
__ Pop(g.ToRegister(dest));
} else if (dest->IsStackSlot()) {
UseScratchRegisterScope temps2(tasm());
Register scratch = temps2.Acquire();
__ Pop(scratch);
__ St_d(scratch, g.ToMemOperand(dest));
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(&temp, dest);
__ Add_d(sp, sp, Operand(new_slots * kSystemPointerSize));
}
}
// Restore the default state to release the {UseScratchRegisterScope} and to
// prepare for the next cycle.
move_cycle_ = MoveCycleState();
}
void CodeGenerator::SetPendingMove(MoveOperands* move) {
InstructionOperand* src = &move->source();
InstructionOperand* dst = &move->destination();
UseScratchRegisterScope temps(tasm());
if (src->IsConstant() || (src->IsStackSlot() && dst->IsStackSlot())) {
Register temp = temps.Acquire();
move_cycle_.scratch_regs.set(temp);
}
if (src->IsAnyStackSlot() || dst->IsAnyStackSlot()) {
Loong64OperandConverter g(this, nullptr);
MemOperand src_mem = g.ToMemOperand(src);
MemOperand dst_mem = g.ToMemOperand(dst);
if (((!is_int16(src_mem.offset()) || (src_mem.offset() & 0b11) != 0) &&
(!is_int12(src_mem.offset()) && !src_mem.hasIndexReg())) ||
((!is_int16(dst_mem.offset()) || (dst_mem.offset() & 0b11) != 0) &&
(!is_int12(dst_mem.offset()) && dst_mem.hasIndexReg()))) {
Register temp = temps.Acquire();
move_cycle_.scratch_regs.set(temp);
}
}
}
void CodeGenerator::AssembleMove(InstructionOperand* source,
InstructionOperand* destination) {
Loong64OperandConverter g(this, nullptr);

View File

@ -4,6 +4,7 @@
#include "src/codegen/assembler-inl.h"
#include "src/codegen/callable.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/mips64/constants-mips64.h"
#include "src/codegen/optimized-compilation-info.h"
@ -4351,6 +4352,112 @@ void CodeGenerator::FinishCode() {}
void CodeGenerator::PrepareForDeoptimizationExits(
ZoneDeque<DeoptimizationExit*>* exits) {}
void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
// Must be kept in sync with {MoveTempLocationTo}.
DCHECK(!source->IsImmediate());
auto rep = LocationOperand::cast(source)->representation();
move_cycle_.temps.emplace(tasm());
auto& temps = *move_cycle_.temps;
// Temporarily exclude the reserved scratch registers while we pick one to
// resolve the move cycle. Re-include them immediately afterwards as they
// might be needed for the move to the temp location.
temps.Exclude(move_cycle_.scratch_regs);
if (!IsFloatingPoint(rep)) {
if (temps.hasAvailable()) {
Register scratch = move_cycle_.temps->Acquire();
move_cycle_.scratch_reg.emplace(scratch);
}
}
temps.Include(move_cycle_.scratch_regs);
if (move_cycle_.scratch_reg.has_value()) {
// A scratch register is available for this rep.
// auto& scratch_reg = *move_cycle_.scratch_reg;
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
move_cycle_.scratch_reg->code());
AssembleMove(source, &scratch);
} else {
// The scratch registers are blocked by pending moves. Use the stack
// instead.
int new_slots = ElementSizeInPointers(rep);
MipsOperandConverter g(this, nullptr);
if (source->IsRegister()) {
__ Push(g.ToRegister(source));
} else if (source->IsStackSlot()) {
UseScratchRegisterScope temps2(tasm());
Register scratch = temps2.Acquire();
__ Ld(scratch, g.ToMemOperand(source));
__ Push(scratch);
} else {
// No push instruction for this operand type. Bump the stack pointer and
// assemble the move.
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
__ Dsubu(sp, sp, Operand(new_slots * kSystemPointerSize));
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(source, &temp);
}
frame_access_state()->IncreaseSPDelta(new_slots);
}
}
void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
MachineRepresentation rep) {
if (move_cycle_.scratch_reg.has_value()) {
// auto& scratch_reg = *move_cycle_.scratch_reg;
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
move_cycle_.scratch_reg->code());
AssembleMove(&scratch, dest);
} else {
int new_slots = ElementSizeInPointers(rep);
frame_access_state()->IncreaseSPDelta(-new_slots);
MipsOperandConverter g(this, nullptr);
if (dest->IsRegister()) {
__ Pop(g.ToRegister(dest));
} else if (dest->IsStackSlot()) {
UseScratchRegisterScope temps2(tasm());
Register scratch = temps2.Acquire();
__ Pop(scratch);
__ Sd(scratch, g.ToMemOperand(dest));
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(&temp, dest);
__ Daddu(sp, sp, Operand(new_slots * kSystemPointerSize));
}
}
// Restore the default state to release the {UseScratchRegisterScope} and to
// prepare for the next cycle.
move_cycle_ = MoveCycleState();
}
void CodeGenerator::SetPendingMove(MoveOperands* move) {
InstructionOperand* src = &move->source();
InstructionOperand* dst = &move->destination();
UseScratchRegisterScope temps(tasm());
if (src->IsConstant() && dst->IsFPLocationOperand()) {
MipsOperandConverter g(this, nullptr);
MemOperand src_mem = g.ToMemOperand(src);
MemOperand dst_mem = g.ToMemOperand(dst);
if (((!is_int16(src_mem.offset())) || (((src_mem.offset() & 0b111) != 0) &&
!is_int16(src_mem.offset() + 4))) ||
((!is_int16(dst_mem.offset())) || (((dst_mem.offset() & 0b111) != 0) &&
!is_int16(dst_mem.offset() + 4)))) {
Register temp = temps.Acquire();
move_cycle_.scratch_regs.set(temp);
}
} else if (src->IsAnyStackSlot() || dst->IsAnyStackSlot()) {
Register temp = temps.Acquire();
move_cycle_.scratch_regs.set(temp);
}
}
void CodeGenerator::AssembleMove(InstructionOperand* source,
InstructionOperand* destination) {
MipsOperandConverter g(this, nullptr);