PPC/s390: [arm][turbofan] Implement on-stack returns.
Port 9c7b6e1e8a
Original Commit Message:
This is the implementation of crrev.com/c/766371 for arm.
Original description:
Add the ability to return (multiple) return values on the stack:
- Extend stack frames with a new buffer region for return slots.
This region is located at the end of a caller's frame such that
its slots can be indexed as caller frame slots in a callee
(located beyond its parameters) and assigned return values.
- Adjust stack frame constructon and deconstruction accordingly.
- Extend linkage computation to support register plus stack returns.
- Reserve return slots in caller frame when respective calls occur.
- Introduce and generate architecture instructions ('peek') for
reading back results from return slots in the caller.
- Aggressive tests.
- Some minor clean-up.
R=ahaas@chromium.org, joransiu@ca.ibm.com, michael_dawson@ca.ibm.com, miladfar@ca.ibm.com
BUG=
LOG=N
Change-Id: I83df1af8c49f6d6c5b529db599fce61a1da2490d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1496549
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Commit-Queue: Junliang Yan <jyan@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#60032}
This commit is contained in:
parent
15925e5cc6
commit
7103c19432
@ -1155,6 +1155,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ and_(i.OutputRegister(), i.InputRegister(0),
|
||||
kSpeculationPoisonRegister);
|
||||
break;
|
||||
case kPPC_Peek: {
|
||||
// The incoming value is 0-based, but we need a 1-based value.
|
||||
int reverse_slot = i.InputInt32(0) + 1;
|
||||
int offset =
|
||||
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
|
||||
if (instr->OutputAt(0)->IsFPRegister()) {
|
||||
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
|
||||
if (op->representation() == MachineRepresentation::kFloat64) {
|
||||
__ LoadDouble(i.OutputDoubleRegister(), MemOperand(fp, offset), r0);
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
|
||||
__ LoadFloat32(i.OutputFloatRegister(), MemOperand(fp, offset), r0);
|
||||
}
|
||||
} else {
|
||||
__ LoadP(i.OutputRegister(), MemOperand(fp, offset), r0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kPPC_And:
|
||||
if (HasRegisterInput(instr, 1)) {
|
||||
__ and_(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
|
||||
|
@ -12,6 +12,7 @@ namespace compiler {
|
||||
// PPC-specific opcodes that specify which assembly sequence to emit.
|
||||
// Most opcodes specify a single instruction.
|
||||
#define TARGET_ARCH_OPCODE_LIST(V) \
|
||||
V(PPC_Peek) \
|
||||
V(PPC_And) \
|
||||
V(PPC_AndComplement) \
|
||||
V(PPC_Or) \
|
||||
|
@ -121,6 +121,11 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kPPC_LoadWord64:
|
||||
case kPPC_LoadFloat32:
|
||||
case kPPC_LoadDouble:
|
||||
case kPPC_AtomicLoadUint8:
|
||||
case kPPC_AtomicLoadUint16:
|
||||
case kPPC_AtomicLoadWord32:
|
||||
case kPPC_AtomicLoadWord64:
|
||||
case kPPC_Peek:
|
||||
return kIsLoadOperation;
|
||||
|
||||
case kPPC_StoreWord8:
|
||||
@ -134,12 +139,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kPPC_StoreToStackSlot:
|
||||
return kHasSideEffect;
|
||||
|
||||
case kPPC_AtomicLoadUint8:
|
||||
case kPPC_AtomicLoadUint16:
|
||||
case kPPC_AtomicLoadWord32:
|
||||
case kPPC_AtomicLoadWord64:
|
||||
return kIsLoadOperation;
|
||||
|
||||
case kPPC_AtomicStoreUint8:
|
||||
case kPPC_AtomicStoreUint16:
|
||||
case kPPC_AtomicStoreWord32:
|
||||
|
@ -2232,7 +2232,24 @@ void InstructionSelector::VisitF32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::EmitPrepareResults(
|
||||
ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
|
||||
Node* node) {
|
||||
// TODO(John): Port.
|
||||
PPCOperandGenerator g(this);
|
||||
|
||||
int reverse_slot = 0;
|
||||
for (PushParameter output : *results) {
|
||||
if (!output.location.IsCallerFrameSlot()) continue;
|
||||
// Skip any alignment holes in nodes.
|
||||
if (output.node != nullptr) {
|
||||
DCHECK(!call_descriptor->IsCFunctionCall());
|
||||
if (output.location.GetType() == MachineType::Float32()) {
|
||||
MarkAsFloat32(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Float64()) {
|
||||
MarkAsFloat64(output.node);
|
||||
}
|
||||
Emit(kPPC_Peek, g.DefineAsRegister(output.node),
|
||||
g.UseImmediate(reverse_slot));
|
||||
}
|
||||
reverse_slot += output.location.GetSizeInPointers();
|
||||
}
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -1637,6 +1637,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
|
||||
__ AndP(i.InputRegister(0), kSpeculationPoisonRegister);
|
||||
break;
|
||||
case kS390_Peek: {
|
||||
// The incoming value is 0-based, but we need a 1-based value.
|
||||
int reverse_slot = i.InputInt32(0) + 1;
|
||||
int offset =
|
||||
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
|
||||
if (instr->OutputAt(0)->IsFPRegister()) {
|
||||
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
|
||||
if (op->representation() == MachineRepresentation::kFloat64) {
|
||||
__ LoadDouble(i.OutputDoubleRegister(), MemOperand(fp, offset));
|
||||
} else {
|
||||
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
|
||||
__ LoadFloat32(i.OutputFloatRegister(), MemOperand(fp, offset));
|
||||
}
|
||||
} else {
|
||||
__ LoadP(i.OutputRegister(), MemOperand(fp, offset));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kS390_Abs32:
|
||||
// TODO(john.yan): zero-ext
|
||||
__ lpr(i.OutputRegister(0), i.InputRegister(0));
|
||||
|
@ -12,6 +12,7 @@ namespace compiler {
|
||||
// S390-specific opcodes that specify which assembly sequence to emit.
|
||||
// Most opcodes specify a single instruction.
|
||||
#define TARGET_ARCH_OPCODE_LIST(V) \
|
||||
V(S390_Peek) \
|
||||
V(S390_Abs32) \
|
||||
V(S390_Abs64) \
|
||||
V(S390_And32) \
|
||||
|
@ -152,6 +152,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kS390_LoadReverse16:
|
||||
case kS390_LoadReverse32:
|
||||
case kS390_LoadReverse64:
|
||||
case kS390_Peek:
|
||||
return kIsLoadOperation;
|
||||
|
||||
case kS390_StoreWord8:
|
||||
|
@ -2632,7 +2632,24 @@ void InstructionSelector::VisitF32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::EmitPrepareResults(
|
||||
ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
|
||||
Node* node) {
|
||||
// TODO(John): Port.
|
||||
S390OperandGenerator g(this);
|
||||
|
||||
int reverse_slot = 0;
|
||||
for (PushParameter output : *results) {
|
||||
if (!output.location.IsCallerFrameSlot()) continue;
|
||||
// Skip any alignment holes in nodes.
|
||||
if (output.node != nullptr) {
|
||||
DCHECK(!call_descriptor->IsCFunctionCall());
|
||||
if (output.location.GetType() == MachineType::Float32()) {
|
||||
MarkAsFloat32(output.node);
|
||||
} else if (output.location.GetType() == MachineType::Float64()) {
|
||||
MarkAsFloat64(output.node);
|
||||
}
|
||||
Emit(kS390_Peek, g.DefineAsRegister(output.node),
|
||||
g.UseImmediate(reverse_slot));
|
||||
}
|
||||
reverse_slot += output.location.GetSizeInPointers();
|
||||
}
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -2756,6 +2756,19 @@ void TurboAssembler::LoadDouble(DoubleRegister dst, const MemOperand& mem,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::LoadFloat32(DoubleRegister dst, const MemOperand& mem,
|
||||
Register scratch) {
|
||||
Register base = mem.ra();
|
||||
int offset = mem.offset();
|
||||
|
||||
if (!is_int16(offset)) {
|
||||
mov(scratch, Operand(offset));
|
||||
lfsx(dst, MemOperand(base, scratch));
|
||||
} else {
|
||||
lfs(dst, mem);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::LoadDoubleU(DoubleRegister dst, const MemOperand& mem,
|
||||
Register scratch) {
|
||||
Register base = mem.ra();
|
||||
|
@ -153,6 +153,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
|
||||
void LoadDouble(DoubleRegister dst, const MemOperand& mem,
|
||||
Register scratch = no_reg);
|
||||
void LoadFloat32(DoubleRegister dst, const MemOperand& mem,
|
||||
Register scratch = no_reg);
|
||||
void LoadDoubleLiteral(DoubleRegister result, Double value, Register scratch);
|
||||
|
||||
// load a literal signed int value <value> to GPR <dst>
|
||||
|
@ -180,11 +180,6 @@
|
||||
'test-api/Uint16Array': [PASS, SLOW],
|
||||
}], # variant == nooptimization and (arch == arm or arch == arm64) and simulator_run
|
||||
|
||||
##############################################################################
|
||||
# TODO(ahaas): Port multiple return values to ARM, MIPS, S390 and PPC
|
||||
['arch == s390 or arch == s390x or arch == ppc or arch == ppc64', {
|
||||
'test-multiple-return/*': [SKIP],
|
||||
}],
|
||||
##############################################################################
|
||||
['asan == True', {
|
||||
# Skip tests not suitable for ASAN.
|
||||
|
Loading…
Reference in New Issue
Block a user