PPC/s390: [arm][turbofan] Implement on-stack returns.

Port 9c7b6e1e8a

Original Commit Message:

    This is the implementation of crrev.com/c/766371 for arm.

    Original description:

    Add the ability to return (multiple) return values on the stack:

    - Extend stack frames with a new buffer region for return slots.
      This region is located at the end of a caller's frame such that
      its slots can be indexed as caller frame slots in a callee
      (located beyond its parameters) and assigned return values.
    - Adjust stack frame constructon and deconstruction accordingly.
    - Extend linkage computation to support register plus stack returns.
    - Reserve return slots in caller frame when respective calls occur.
    - Introduce and generate architecture instructions ('peek') for
      reading back results from return slots in the caller.
    - Aggressive tests.
    - Some minor clean-up.

R=ahaas@chromium.org, joransiu@ca.ibm.com, michael_dawson@ca.ibm.com, miladfar@ca.ibm.com
BUG=
LOG=N

Change-Id: I83df1af8c49f6d6c5b529db599fce61a1da2490d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1496549
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Commit-Queue: Junliang Yan <jyan@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#60032}
This commit is contained in:
Junliang Yan 2019-03-04 09:23:27 -05:00 committed by Commit Bot
parent 15925e5cc6
commit 7103c19432
11 changed files with 95 additions and 13 deletions

View File

@ -1155,6 +1155,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ and_(i.OutputRegister(), i.InputRegister(0),
kSpeculationPoisonRegister);
break;
case kPPC_Peek: {
// The incoming value is 0-based, but we need a 1-based value.
int reverse_slot = i.InputInt32(0) + 1;
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
if (instr->OutputAt(0)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
if (op->representation() == MachineRepresentation::kFloat64) {
__ LoadDouble(i.OutputDoubleRegister(), MemOperand(fp, offset), r0);
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
__ LoadFloat32(i.OutputFloatRegister(), MemOperand(fp, offset), r0);
}
} else {
__ LoadP(i.OutputRegister(), MemOperand(fp, offset), r0);
}
break;
}
case kPPC_And:
if (HasRegisterInput(instr, 1)) {
__ and_(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),

View File

@ -12,6 +12,7 @@ namespace compiler {
// PPC-specific opcodes that specify which assembly sequence to emit.
// Most opcodes specify a single instruction.
#define TARGET_ARCH_OPCODE_LIST(V) \
V(PPC_Peek) \
V(PPC_And) \
V(PPC_AndComplement) \
V(PPC_Or) \

View File

@ -121,6 +121,11 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_LoadWord64:
case kPPC_LoadFloat32:
case kPPC_LoadDouble:
case kPPC_AtomicLoadUint8:
case kPPC_AtomicLoadUint16:
case kPPC_AtomicLoadWord32:
case kPPC_AtomicLoadWord64:
case kPPC_Peek:
return kIsLoadOperation;
case kPPC_StoreWord8:
@ -134,12 +139,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_StoreToStackSlot:
return kHasSideEffect;
case kPPC_AtomicLoadUint8:
case kPPC_AtomicLoadUint16:
case kPPC_AtomicLoadWord32:
case kPPC_AtomicLoadWord64:
return kIsLoadOperation;
case kPPC_AtomicStoreUint8:
case kPPC_AtomicStoreUint16:
case kPPC_AtomicStoreWord32:

View File

@ -2232,7 +2232,24 @@ void InstructionSelector::VisitF32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::EmitPrepareResults(
ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
Node* node) {
// TODO(John): Port.
PPCOperandGenerator g(this);
int reverse_slot = 0;
for (PushParameter output : *results) {
if (!output.location.IsCallerFrameSlot()) continue;
// Skip any alignment holes in nodes.
if (output.node != nullptr) {
DCHECK(!call_descriptor->IsCFunctionCall());
if (output.location.GetType() == MachineType::Float32()) {
MarkAsFloat32(output.node);
} else if (output.location.GetType() == MachineType::Float64()) {
MarkAsFloat64(output.node);
}
Emit(kPPC_Peek, g.DefineAsRegister(output.node),
g.UseImmediate(reverse_slot));
}
reverse_slot += output.location.GetSizeInPointers();
}
}
void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); }

View File

@ -1637,6 +1637,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
__ AndP(i.InputRegister(0), kSpeculationPoisonRegister);
break;
case kS390_Peek: {
// The incoming value is 0-based, but we need a 1-based value.
int reverse_slot = i.InputInt32(0) + 1;
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
if (instr->OutputAt(0)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
if (op->representation() == MachineRepresentation::kFloat64) {
__ LoadDouble(i.OutputDoubleRegister(), MemOperand(fp, offset));
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
__ LoadFloat32(i.OutputFloatRegister(), MemOperand(fp, offset));
}
} else {
__ LoadP(i.OutputRegister(), MemOperand(fp, offset));
}
break;
}
case kS390_Abs32:
// TODO(john.yan): zero-ext
__ lpr(i.OutputRegister(0), i.InputRegister(0));

View File

@ -12,6 +12,7 @@ namespace compiler {
// S390-specific opcodes that specify which assembly sequence to emit.
// Most opcodes specify a single instruction.
#define TARGET_ARCH_OPCODE_LIST(V) \
V(S390_Peek) \
V(S390_Abs32) \
V(S390_Abs64) \
V(S390_And32) \

View File

@ -152,6 +152,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_LoadReverse16:
case kS390_LoadReverse32:
case kS390_LoadReverse64:
case kS390_Peek:
return kIsLoadOperation;
case kS390_StoreWord8:

View File

@ -2632,7 +2632,24 @@ void InstructionSelector::VisitF32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::EmitPrepareResults(
ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
Node* node) {
// TODO(John): Port.
S390OperandGenerator g(this);
int reverse_slot = 0;
for (PushParameter output : *results) {
if (!output.location.IsCallerFrameSlot()) continue;
// Skip any alignment holes in nodes.
if (output.node != nullptr) {
DCHECK(!call_descriptor->IsCFunctionCall());
if (output.location.GetType() == MachineType::Float32()) {
MarkAsFloat32(output.node);
} else if (output.location.GetType() == MachineType::Float64()) {
MarkAsFloat64(output.node);
}
Emit(kS390_Peek, g.DefineAsRegister(output.node),
g.UseImmediate(reverse_slot));
}
reverse_slot += output.location.GetSizeInPointers();
}
}
void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); }

View File

@ -2756,6 +2756,19 @@ void TurboAssembler::LoadDouble(DoubleRegister dst, const MemOperand& mem,
}
}
void TurboAssembler::LoadFloat32(DoubleRegister dst, const MemOperand& mem,
Register scratch) {
Register base = mem.ra();
int offset = mem.offset();
if (!is_int16(offset)) {
mov(scratch, Operand(offset));
lfsx(dst, MemOperand(base, scratch));
} else {
lfs(dst, mem);
}
}
void MacroAssembler::LoadDoubleU(DoubleRegister dst, const MemOperand& mem,
Register scratch) {
Register base = mem.ra();

View File

@ -153,6 +153,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void LoadDouble(DoubleRegister dst, const MemOperand& mem,
Register scratch = no_reg);
void LoadFloat32(DoubleRegister dst, const MemOperand& mem,
Register scratch = no_reg);
void LoadDoubleLiteral(DoubleRegister result, Double value, Register scratch);
// load a literal signed int value <value> to GPR <dst>

View File

@ -180,11 +180,6 @@
'test-api/Uint16Array': [PASS, SLOW],
}], # variant == nooptimization and (arch == arm or arch == arm64) and simulator_run
##############################################################################
# TODO(ahaas): Port multiple return values to ARM, MIPS, S390 and PPC
['arch == s390 or arch == s390x or arch == ppc or arch == ppc64', {
'test-multiple-return/*': [SKIP],
}],
##############################################################################
['asan == True', {
# Skip tests not suitable for ASAN.