PPC/s390: [turbofan]: Support using push instructions for setting up tail call parameters
Port bd0d9e7d87
Original commit message:
This optimizes the passing of stack parameters in function calls.
For some architectures (ia32/x64), using pushes when possible instead
of bumping the stack and then storing parameters generates much
smaller code, and in some cases is faster (e.g. when a push of a memory
location can implement a memory-to-memory copy and thus elide an
intermediate load. On others (e.g. ARM), the benefit is smaller, where
it's only possible to elide direct stack pointer adjustment in certain cases
or combine multiple register stores into a single instruction in other limited
situations. On yet other platforms (ARM64, MIPS), there are no push instructions,
and this optimization isn't used at all.
Ideally, this mechanism would be used for both tail calls and normal calls,
but "normal" calls are currently pretty efficient, and tail calls are very
inefficient, so this CL sets the bar low for building a new mechanism to
handle parameter pushing that only needs to raise the bar on tail calls for now.
The key aspect of this change is that adjustment to the stack pointer
for tail calls (and perhaps later real calls) is an explicit step separate from
instruction selection and gap resolution, but aware of both, making it possible
to safely recognize gap moves that are actually pushes.
R=danno@chromium.org, joransiu@ca.ibm.com, jyan@ca.ibm.com, michael_dawson@ca.ibm.com, mbrandy@us.ibm.com
BUG=
LOG=N
Review-Url: https://codereview.chromium.org/2123983002
Cr-Commit-Position: refs/heads/master@{#37561}
This commit is contained in:
parent
a53bf2264d
commit
433572b8e0
@ -728,21 +728,7 @@ void CodeGenerator::AssembleDeconstructFrame() {
|
||||
__ LeaveFrame(StackFrame::MANUAL);
|
||||
}
|
||||
|
||||
void CodeGenerator::AssembleDeconstructActivationRecord(int stack_param_delta) {
|
||||
int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
|
||||
if (sp_slot_delta > 0) {
|
||||
__ Add(sp, sp, sp_slot_delta * kPointerSize, r0);
|
||||
}
|
||||
frame_access_state()->SetFrameAccessToDefault();
|
||||
}
|
||||
|
||||
|
||||
void CodeGenerator::AssemblePrepareTailCall(int stack_param_delta) {
|
||||
int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
|
||||
if (sp_slot_delta < 0) {
|
||||
__ Add(sp, sp, sp_slot_delta * kPointerSize, r0);
|
||||
frame_access_state()->IncreaseSPDelta(-sp_slot_delta);
|
||||
}
|
||||
void CodeGenerator::AssemblePrepareTailCall() {
|
||||
if (frame_access_state()->has_frame()) {
|
||||
__ RestoreFrameStateForTailCall();
|
||||
}
|
||||
@ -774,6 +760,116 @@ void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
|
||||
__ bind(&done);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
void FlushPendingPushRegisters(MacroAssembler* masm,
|
||||
FrameAccessState* frame_access_state,
|
||||
ZoneVector<Register>* pending_pushes) {
|
||||
switch (pending_pushes->size()) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
masm->Push((*pending_pushes)[0]);
|
||||
break;
|
||||
case 2:
|
||||
masm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
|
||||
break;
|
||||
case 3:
|
||||
masm->Push((*pending_pushes)[0], (*pending_pushes)[1],
|
||||
(*pending_pushes)[2]);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
frame_access_state->IncreaseSPDelta(pending_pushes->size());
|
||||
pending_pushes->resize(0);
|
||||
}
|
||||
|
||||
void AddPendingPushRegister(MacroAssembler* masm,
|
||||
FrameAccessState* frame_access_state,
|
||||
ZoneVector<Register>* pending_pushes,
|
||||
Register reg) {
|
||||
pending_pushes->push_back(reg);
|
||||
if (pending_pushes->size() == 3 || reg.is(ip)) {
|
||||
FlushPendingPushRegisters(masm, frame_access_state, pending_pushes);
|
||||
}
|
||||
}
|
||||
|
||||
void AdjustStackPointerForTailCall(
|
||||
MacroAssembler* masm, FrameAccessState* state, int new_slot_above_sp,
|
||||
ZoneVector<Register>* pending_pushes = nullptr,
|
||||
bool allow_shrinkage = true) {
|
||||
int current_sp_offset = state->GetSPToFPSlotCount() +
|
||||
StandardFrameConstants::kFixedSlotCountAboveFp;
|
||||
int stack_slot_delta = new_slot_above_sp - current_sp_offset;
|
||||
if (stack_slot_delta > 0) {
|
||||
if (pending_pushes != nullptr) {
|
||||
FlushPendingPushRegisters(masm, state, pending_pushes);
|
||||
}
|
||||
masm->Add(sp, sp, -stack_slot_delta * kPointerSize, r0);
|
||||
state->IncreaseSPDelta(stack_slot_delta);
|
||||
} else if (allow_shrinkage && stack_slot_delta < 0) {
|
||||
if (pending_pushes != nullptr) {
|
||||
FlushPendingPushRegisters(masm, state, pending_pushes);
|
||||
}
|
||||
masm->Add(sp, sp, -stack_slot_delta * kPointerSize, r0);
|
||||
state->IncreaseSPDelta(stack_slot_delta);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
|
||||
int first_unused_stack_slot) {
|
||||
CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
|
||||
ZoneVector<MoveOperands*> pushes(zone());
|
||||
GetPushCompatibleMoves(instr, flags, &pushes);
|
||||
|
||||
if (!pushes.empty() &&
|
||||
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
|
||||
first_unused_stack_slot)) {
|
||||
PPCOperandConverter g(this, instr);
|
||||
ZoneVector<Register> pending_pushes(zone());
|
||||
for (auto move : pushes) {
|
||||
LocationOperand destination_location(
|
||||
LocationOperand::cast(move->destination()));
|
||||
InstructionOperand source(move->source());
|
||||
AdjustStackPointerForTailCall(
|
||||
masm(), frame_access_state(),
|
||||
destination_location.index() - pending_pushes.size(),
|
||||
&pending_pushes);
|
||||
if (source.IsStackSlot()) {
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
__ LoadP(ip, g.SlotToMemOperand(source_location.index()));
|
||||
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
|
||||
ip);
|
||||
} else if (source.IsRegister()) {
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
|
||||
source_location.GetRegister());
|
||||
} else if (source.IsImmediate()) {
|
||||
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
|
||||
ip);
|
||||
} else {
|
||||
// Pushes of non-scalar data types is not supported.
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
move->Eliminate();
|
||||
}
|
||||
FlushPendingPushRegisters(masm(), frame_access_state(), &pending_pushes);
|
||||
}
|
||||
AdjustStackPointerForTailCall(masm(), frame_access_state(),
|
||||
first_unused_stack_slot, nullptr, false);
|
||||
}
|
||||
|
||||
void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
|
||||
int first_unused_stack_slot) {
|
||||
AdjustStackPointerForTailCall(masm(), frame_access_state(),
|
||||
first_unused_stack_slot);
|
||||
}
|
||||
|
||||
|
||||
// Assembles an instruction after register allocation, producing machine code.
|
||||
CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
Instruction* instr) {
|
||||
@ -800,8 +896,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kArchTailCallCodeObjectFromJSFunction:
|
||||
case kArchTailCallCodeObject: {
|
||||
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
|
||||
AssembleDeconstructActivationRecord(stack_param_delta);
|
||||
if (opcode == kArchTailCallCodeObjectFromJSFunction) {
|
||||
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
|
||||
i.TempRegister(0), i.TempRegister(1),
|
||||
@ -820,14 +914,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
DCHECK_EQ(LeaveRC, i.OutputRCBit());
|
||||
frame_access_state()->ClearSPDelta();
|
||||
frame_access_state()->SetFrameAccessToDefault();
|
||||
break;
|
||||
}
|
||||
case kArchTailCallAddress: {
|
||||
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
|
||||
AssembleDeconstructActivationRecord(stack_param_delta);
|
||||
CHECK(!instr->InputAt(0)->IsImmediate());
|
||||
__ Jump(i.InputRegister(0));
|
||||
frame_access_state()->ClearSPDelta();
|
||||
frame_access_state()->SetFrameAccessToDefault();
|
||||
break;
|
||||
}
|
||||
case kArchCallJSFunction: {
|
||||
@ -859,8 +953,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ cmp(cp, kScratchReg);
|
||||
__ Assert(eq, kWrongFunctionContext);
|
||||
}
|
||||
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
|
||||
AssembleDeconstructActivationRecord(stack_param_delta);
|
||||
if (opcode == kArchTailCallJSFunctionFromJSFunction) {
|
||||
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
|
||||
i.TempRegister(0), i.TempRegister(1),
|
||||
@ -870,6 +962,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Jump(ip);
|
||||
DCHECK_EQ(LeaveRC, i.OutputRCBit());
|
||||
frame_access_state()->ClearSPDelta();
|
||||
frame_access_state()->SetFrameAccessToDefault();
|
||||
break;
|
||||
}
|
||||
case kArchPrepareCallCFunction: {
|
||||
@ -880,7 +973,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kArchPrepareTailCall:
|
||||
AssemblePrepareTailCall(i.InputInt32(instr->InputCount() - 1));
|
||||
AssemblePrepareTailCall();
|
||||
break;
|
||||
case kArchCallCFunction: {
|
||||
int const num_parameters = MiscField::decode(instr->opcode());
|
||||
|
@ -580,20 +580,7 @@ void CodeGenerator::AssembleDeconstructFrame() {
|
||||
__ LeaveFrame(StackFrame::MANUAL);
|
||||
}
|
||||
|
||||
void CodeGenerator::AssembleDeconstructActivationRecord(int stack_param_delta) {
|
||||
int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
|
||||
if (sp_slot_delta > 0) {
|
||||
__ AddP(sp, sp, Operand(sp_slot_delta * kPointerSize));
|
||||
}
|
||||
frame_access_state()->SetFrameAccessToDefault();
|
||||
}
|
||||
|
||||
void CodeGenerator::AssemblePrepareTailCall(int stack_param_delta) {
|
||||
int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
|
||||
if (sp_slot_delta < 0) {
|
||||
__ AddP(sp, sp, Operand(sp_slot_delta * kPointerSize));
|
||||
frame_access_state()->IncreaseSPDelta(-sp_slot_delta);
|
||||
}
|
||||
void CodeGenerator::AssemblePrepareTailCall() {
|
||||
if (frame_access_state()->has_frame()) {
|
||||
__ RestoreFrameStateForTailCall();
|
||||
}
|
||||
@ -625,6 +612,114 @@ void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
|
||||
__ bind(&done);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
void FlushPendingPushRegisters(MacroAssembler* masm,
|
||||
FrameAccessState* frame_access_state,
|
||||
ZoneVector<Register>* pending_pushes) {
|
||||
switch (pending_pushes->size()) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
masm->Push((*pending_pushes)[0]);
|
||||
break;
|
||||
case 2:
|
||||
masm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
|
||||
break;
|
||||
case 3:
|
||||
masm->Push((*pending_pushes)[0], (*pending_pushes)[1],
|
||||
(*pending_pushes)[2]);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
frame_access_state->IncreaseSPDelta(pending_pushes->size());
|
||||
pending_pushes->resize(0);
|
||||
}
|
||||
|
||||
void AddPendingPushRegister(MacroAssembler* masm,
|
||||
FrameAccessState* frame_access_state,
|
||||
ZoneVector<Register>* pending_pushes,
|
||||
Register reg) {
|
||||
pending_pushes->push_back(reg);
|
||||
if (pending_pushes->size() == 3 || reg.is(ip)) {
|
||||
FlushPendingPushRegisters(masm, frame_access_state, pending_pushes);
|
||||
}
|
||||
}
|
||||
void AdjustStackPointerForTailCall(
|
||||
MacroAssembler* masm, FrameAccessState* state, int new_slot_above_sp,
|
||||
ZoneVector<Register>* pending_pushes = nullptr,
|
||||
bool allow_shrinkage = true) {
|
||||
int current_sp_offset = state->GetSPToFPSlotCount() +
|
||||
StandardFrameConstants::kFixedSlotCountAboveFp;
|
||||
int stack_slot_delta = new_slot_above_sp - current_sp_offset;
|
||||
if (stack_slot_delta > 0) {
|
||||
if (pending_pushes != nullptr) {
|
||||
FlushPendingPushRegisters(masm, state, pending_pushes);
|
||||
}
|
||||
masm->AddP(sp, sp, Operand(-stack_slot_delta * kPointerSize));
|
||||
state->IncreaseSPDelta(stack_slot_delta);
|
||||
} else if (allow_shrinkage && stack_slot_delta < 0) {
|
||||
if (pending_pushes != nullptr) {
|
||||
FlushPendingPushRegisters(masm, state, pending_pushes);
|
||||
}
|
||||
masm->AddP(sp, sp, Operand(-stack_slot_delta * kPointerSize));
|
||||
state->IncreaseSPDelta(stack_slot_delta);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
|
||||
int first_unused_stack_slot) {
|
||||
CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
|
||||
ZoneVector<MoveOperands*> pushes(zone());
|
||||
GetPushCompatibleMoves(instr, flags, &pushes);
|
||||
|
||||
if (!pushes.empty() &&
|
||||
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
|
||||
first_unused_stack_slot)) {
|
||||
S390OperandConverter g(this, instr);
|
||||
ZoneVector<Register> pending_pushes(zone());
|
||||
for (auto move : pushes) {
|
||||
LocationOperand destination_location(
|
||||
LocationOperand::cast(move->destination()));
|
||||
InstructionOperand source(move->source());
|
||||
AdjustStackPointerForTailCall(
|
||||
masm(), frame_access_state(),
|
||||
destination_location.index() - pending_pushes.size(),
|
||||
&pending_pushes);
|
||||
if (source.IsStackSlot()) {
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
__ LoadP(ip, g.SlotToMemOperand(source_location.index()));
|
||||
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
|
||||
ip);
|
||||
} else if (source.IsRegister()) {
|
||||
LocationOperand source_location(LocationOperand::cast(source));
|
||||
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
|
||||
source_location.GetRegister());
|
||||
} else if (source.IsImmediate()) {
|
||||
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
|
||||
ip);
|
||||
} else {
|
||||
// Pushes of non-scalar data types is not supported.
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
move->Eliminate();
|
||||
}
|
||||
FlushPendingPushRegisters(masm(), frame_access_state(), &pending_pushes);
|
||||
}
|
||||
AdjustStackPointerForTailCall(masm(), frame_access_state(),
|
||||
first_unused_stack_slot, nullptr, false);
|
||||
}
|
||||
|
||||
void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
|
||||
int first_unused_stack_slot) {
|
||||
AdjustStackPointerForTailCall(masm(), frame_access_state(),
|
||||
first_unused_stack_slot);
|
||||
}
|
||||
|
||||
// Assembles an instruction after register allocation, producing machine code.
|
||||
CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
Instruction* instr) {
|
||||
@ -648,8 +743,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kArchTailCallCodeObjectFromJSFunction:
|
||||
case kArchTailCallCodeObject: {
|
||||
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
|
||||
AssembleDeconstructActivationRecord(stack_param_delta);
|
||||
if (opcode == kArchTailCallCodeObjectFromJSFunction) {
|
||||
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
|
||||
i.TempRegister(0), i.TempRegister(1),
|
||||
@ -667,14 +760,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
RelocInfo::CODE_TARGET);
|
||||
}
|
||||
frame_access_state()->ClearSPDelta();
|
||||
frame_access_state()->SetFrameAccessToDefault();
|
||||
break;
|
||||
}
|
||||
case kArchTailCallAddress: {
|
||||
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
|
||||
AssembleDeconstructActivationRecord(stack_param_delta);
|
||||
CHECK(!instr->InputAt(0)->IsImmediate());
|
||||
__ Jump(i.InputRegister(0));
|
||||
frame_access_state()->ClearSPDelta();
|
||||
frame_access_state()->SetFrameAccessToDefault();
|
||||
break;
|
||||
}
|
||||
case kArchCallJSFunction: {
|
||||
@ -703,8 +796,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ CmpP(cp, kScratchReg);
|
||||
__ Assert(eq, kWrongFunctionContext);
|
||||
}
|
||||
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
|
||||
AssembleDeconstructActivationRecord(stack_param_delta);
|
||||
if (opcode == kArchTailCallJSFunctionFromJSFunction) {
|
||||
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
|
||||
i.TempRegister(0), i.TempRegister(1),
|
||||
@ -713,6 +804,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ LoadP(ip, FieldMemOperand(func, JSFunction::kCodeEntryOffset));
|
||||
__ Jump(ip);
|
||||
frame_access_state()->ClearSPDelta();
|
||||
frame_access_state()->SetFrameAccessToDefault();
|
||||
break;
|
||||
}
|
||||
case kArchPrepareCallCFunction: {
|
||||
@ -723,7 +815,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kArchPrepareTailCall:
|
||||
AssemblePrepareTailCall(i.InputInt32(instr->InputCount() - 1));
|
||||
AssemblePrepareTailCall();
|
||||
break;
|
||||
case kArchCallCFunction: {
|
||||
int const num_parameters = MiscField::decode(instr->opcode());
|
||||
|
Loading…
Reference in New Issue
Block a user