A64: Tidy up Push and Pop TODOs.

This addresses several TODOs:
  - Push and Pop requests can be queued up so that arrays of Registers
    can be pushed efficiently, with just one PrepareForPush/Pop.
  - PushMultipleTimes now takes an Operand. This allows variable-length
    arguments arrays to be initialized, for example.
  - A NoUseRealAbortsScope has been added to Abort so that
    AssertStackConsistency can be called from PrepareForPush without
    introducing infinite recursion.

BUG=
R=rmcilroy@chromium.org, ulan@chromium.org

Review URL: https://codereview.chromium.org/170623002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19474 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
jacob.bramley@arm.com 2014-02-19 09:43:45 +00:00
parent 78b2a18d02
commit 6d9fcf1198
9 changed files with 374 additions and 69 deletions

View File

@ -523,11 +523,14 @@ void HydrogenCodeStub::GenerateLightweightMiss(MacroAssembler* masm) {
FrameScope scope(masm, StackFrame::INTERNAL);
ASSERT((descriptor->register_param_count_ == 0) ||
x0.Is(descriptor->register_params_[param_count - 1]));
// Push arguments
// TODO(jbramley): Try to push these in blocks.
MacroAssembler::PushPopQueue queue(masm);
for (int i = 0; i < param_count; ++i) {
__ Push(descriptor->register_params_[i]);
queue.Queue(descriptor->register_params_[i]);
}
queue.PushQueued();
ExternalReference miss = descriptor->miss_handler();
__ CallExternalReference(miss, descriptor->register_param_count_);
}

View File

@ -183,7 +183,7 @@ void FullCodeGenerator::Generate() {
if (locals_count > 0) {
__ LoadRoot(x10, Heap::kUndefinedValueRootIndex);
__ PushMultipleTimes(locals_count, x10);
__ PushMultipleTimes(x10, locals_count);
}
}
@ -4622,20 +4622,11 @@ void FullCodeGenerator::EmitGeneratorResume(Expression *generator,
__ Ldr(w10, FieldMemOperand(x10,
SharedFunctionInfo::kFormalParameterCountOffset));
__ LoadRoot(the_hole, Heap::kTheHoleValueRootIndex);
// TODO(jbramley): Write a variant of PushMultipleTimes which takes a register
// instead of a constant count, and use it to replace this loop.
Label push_argument_holes, push_frame;
__ Bind(&push_argument_holes);
__ Subs(w10, w10, 1);
__ B(mi, &push_frame);
__ Push(the_hole);
__ B(&push_argument_holes);
__ PushMultipleTimes(the_hole, w10);
// Enter a new JavaScript frame, and initialize its slots as they were when
// the generator was suspended.
Label resume_frame;
__ Bind(&push_frame);
__ Bl(&resume_frame);
__ B(&done);
@ -4672,16 +4663,8 @@ void FullCodeGenerator::EmitGeneratorResume(Expression *generator,
// Otherwise, we push holes for the operand stack and call the runtime to fix
// up the stack and the handlers.
// TODO(jbramley): Write a variant of PushMultipleTimes which takes a register
// instead of a constant count, and use it to replace this loop.
Label push_operand_holes, call_resume;
__ Bind(&push_operand_holes);
__ Subs(operand_stack_size, operand_stack_size, 1);
__ B(mi, &call_resume);
__ Push(the_hole);
__ B(&push_operand_holes);
__ PushMultipleTimes(the_hole, operand_stack_size);
__ Bind(&call_resume);
__ Mov(x10, Operand(Smi::FromInt(resume_mode)));
__ Push(generator_object, result_register(), x10);
__ CallRuntime(Runtime::kResumeJSGeneratorObject, 3);

View File

@ -1251,8 +1251,9 @@ void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
ASSERT(!csp.Is(sp_));
// TODO(jbramley): Several callers rely on this not using scratch registers,
// so we use the assembler directly here. However, this means that large
// immediate values of 'space' cannot be handled. Once we merge with V8, we
// should try to use the new scope that controls scratch register usage.
// immediate values of 'space' cannot be handled cleanly. Once we implement
// our flexible scratch register idea, we could greatly simplify this
// function.
InstructionAccurateScope scope(this);
if ((space.IsImmediate()) && !is_uint12(space.immediate())) {
// The subtract instruction supports a 12-bit immediate, shifted left by

View File

@ -822,6 +822,56 @@ void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
}
void MacroAssembler::PushPopQueue::PushQueued() {
if (queued_.empty()) return;
masm_->PrepareForPush(size_);
int count = queued_.size();
int index = 0;
while (index < count) {
// PushHelper can only handle registers with the same size and type, and it
// can handle only four at a time. Batch them up accordingly.
CPURegister batch[4] = {NoReg, NoReg, NoReg, NoReg};
int batch_index = 0;
do {
batch[batch_index++] = queued_[index++];
} while ((batch_index < 4) && (index < count) &&
batch[0].IsSameSizeAndType(queued_[index]));
masm_->PushHelper(batch_index, batch[0].SizeInBytes(),
batch[0], batch[1], batch[2], batch[3]);
}
queued_.clear();
}
void MacroAssembler::PushPopQueue::PopQueued() {
if (queued_.empty()) return;
masm_->PrepareForPop(size_);
int count = queued_.size();
int index = 0;
while (index < count) {
// PopHelper can only handle registers with the same size and type, and it
// can handle only four at a time. Batch them up accordingly.
CPURegister batch[4] = {NoReg, NoReg, NoReg, NoReg};
int batch_index = 0;
do {
batch[batch_index++] = queued_[index++];
} while ((batch_index < 4) && (index < count) &&
batch[0].IsSameSizeAndType(queued_[index]));
masm_->PopHelper(batch_index, batch[0].SizeInBytes(),
batch[0], batch[1], batch[2], batch[3]);
}
queued_.clear();
}
void MacroAssembler::PushCPURegList(CPURegList registers) {
int size = registers.RegisterSizeInBytes();
@ -867,7 +917,7 @@ void MacroAssembler::PopCPURegList(CPURegList registers) {
}
void MacroAssembler::PushMultipleTimes(int count, Register src) {
void MacroAssembler::PushMultipleTimes(CPURegister src, int count) {
int size = src.SizeInBytes();
PrepareForPush(count, size);
@ -902,6 +952,51 @@ void MacroAssembler::PushMultipleTimes(int count, Register src) {
}
void MacroAssembler::PushMultipleTimes(CPURegister src, Register count) {
PrepareForPush(Operand(count, UXTW, WhichPowerOf2(src.SizeInBytes())));
Register temp = AppropriateTempFor(count);
if (FLAG_optimize_for_size) {
Label loop, done;
Subs(temp, count, 1);
B(mi, &done);
// Push all registers individually, to save code size.
Bind(&loop);
Subs(temp, temp, 1);
PushHelper(1, src.SizeInBytes(), src, NoReg, NoReg, NoReg);
B(pl, &loop);
Bind(&done);
} else {
Label loop, leftover2, leftover1, done;
Subs(temp, count, 4);
B(mi, &leftover2);
// Push groups of four first.
Bind(&loop);
Subs(temp, temp, 4);
PushHelper(4, src.SizeInBytes(), src, src, src, src);
B(pl, &loop);
// Push groups of two.
Bind(&leftover2);
Tbz(count, 1, &leftover1);
PushHelper(2, src.SizeInBytes(), src, src, NoReg, NoReg);
// Push the last one (if required).
Bind(&leftover1);
Tbz(count, 0, &done);
PushHelper(1, src.SizeInBytes(), src, NoReg, NoReg, NoReg);
Bind(&done);
}
}
void MacroAssembler::PushHelper(int count, int size,
const CPURegister& src0,
const CPURegister& src1,
@ -983,30 +1078,39 @@ void MacroAssembler::PopHelper(int count, int size,
}
void MacroAssembler::PrepareForPush(int count, int size) {
// TODO(jbramley): Use AssertStackConsistency here, if possible. See the
// AssertStackConsistency for details of why we can't at the moment.
if (csp.Is(StackPointer())) {
// If the current stack pointer is csp, then it must be aligned to 16 bytes
// on entry and the total size of the specified registers must also be a
// multiple of 16 bytes.
ASSERT((count * size) % 16 == 0);
} else {
// Even if the current stack pointer is not the system stack pointer (csp),
// the system stack pointer will still be modified in order to comply with
// ABI rules about accessing memory below the system stack pointer.
BumpSystemStackPointer(count * size);
}
}
void MacroAssembler::PrepareForPop(int count, int size) {
void MacroAssembler::PrepareForPush(Operand total_size) {
AssertStackConsistency();
if (csp.Is(StackPointer())) {
// If the current stack pointer is csp, then it must be aligned to 16 bytes
// on entry and the total size of the specified registers must also be a
// multiple of 16 bytes.
ASSERT((count * size) % 16 == 0);
if (total_size.IsImmediate()) {
ASSERT((total_size.immediate() % 16) == 0);
}
// Don't check access size for non-immediate sizes. It's difficult to do
// well, and it will be caught by hardware (or the simulator) anyway.
} else {
// Even if the current stack pointer is not the system stack pointer (csp),
// the system stack pointer will still be modified in order to comply with
// ABI rules about accessing memory below the system stack pointer.
BumpSystemStackPointer(total_size);
}
}
void MacroAssembler::PrepareForPop(Operand total_size) {
AssertStackConsistency();
if (csp.Is(StackPointer())) {
// If the current stack pointer is csp, then it must be aligned to 16 bytes
// on entry and the total size of the specified registers must also be a
// multiple of 16 bytes.
if (total_size.IsImmediate()) {
ASSERT((total_size.immediate() % 16) == 0);
}
// Don't check access size for non-immediate sizes. It's difficult to do
// well, and it will be caught by hardware (or the simulator) anyway.
}
}
@ -1102,15 +1206,24 @@ void MacroAssembler::PopCalleeSavedRegisters() {
void MacroAssembler::AssertStackConsistency() {
if (emit_debug_code() && !csp.Is(StackPointer())) {
if (emit_debug_code()) {
if (csp.Is(StackPointer())) {
// TODO(jbramley): Check for csp alignment if it is the stack pointer.
} else {
// TODO(jbramley): Currently we cannot use this assertion in Push because
// some calling code assumes that the flags are preserved. For an example,
// look at Builtins::Generate_ArgumentsAdaptorTrampoline.
Cmp(csp, StackPointer());
Check(ls, kTheCurrentStackPointerIsBelowCsp);
// We can't check the alignment of csp without using a scratch register
// (or clobbering the flags), but the processor (or simulator) will abort
// if it is not properly aligned during a load.
ldr(xzr, MemOperand(csp, 0));
} else if (FLAG_enable_slow_asserts) {
Label ok;
// Check that csp <= StackPointer(), preserving all registers and NZCV.
sub(StackPointer(), csp, StackPointer());
cbz(StackPointer(), &ok); // Ok if csp == StackPointer().
tbnz(StackPointer(), kXSignBit, &ok); // Ok if csp < StackPointer().
Abort(kTheCurrentStackPointerIsBelowCsp);
bind(&ok);
// Restore StackPointer().
sub(StackPointer(), csp, StackPointer());
}
}
}
@ -4515,6 +4628,9 @@ void MacroAssembler::Abort(BailoutReason reason) {
Mov(jssp, old_stack_pointer);
if (use_real_aborts()) {
// Avoid infinite recursion; Push contains some assertions that use Abort.
NoUseRealAbortsScope no_real_aborts(this);
Mov(x0, Operand(Smi::FromInt(reason)));
Push(x0);

View File

@ -28,6 +28,8 @@
#ifndef V8_A64_MACRO_ASSEMBLER_A64_H_
#define V8_A64_MACRO_ASSEMBLER_A64_H_
#include <vector>
#include "v8globals.h"
#include "globals.h"
@ -516,7 +518,8 @@ class MacroAssembler : public Assembler {
}
// Push the specified register 'count' times.
void PushMultipleTimes(int count, Register src);
void PushMultipleTimes(CPURegister src, Register count);
void PushMultipleTimes(CPURegister src, int count);
// This is a convenience method for pushing a single Handle<Object>.
inline void Push(Handle<Object> handle);
@ -530,6 +533,35 @@ class MacroAssembler : public Assembler {
Pop(dst);
}
// Sometimes callers need to push or pop multiple registers in a way that is
// difficult to structure efficiently for fixed Push or Pop calls. This scope
// allows push requests to be queued up, then flushed at once. The
// MacroAssembler will try to generate the most efficient sequence required.
//
// Unlike the other Push and Pop macros, PushPopQueue can handle mixed sets of
// register sizes and types.
class PushPopQueue {
public:
explicit PushPopQueue(MacroAssembler* masm) : masm_(masm), size_(0) { }
~PushPopQueue() {
ASSERT(queued_.empty());
}
void Queue(const CPURegister& rt) {
size_ += rt.SizeInBytes();
queued_.push_back(rt);
}
void PushQueued();
void PopQueued();
private:
MacroAssembler* masm_;
int size_;
std::vector<CPURegister> queued_;
};
// Poke 'src' onto the stack. The offset is in bytes.
//
// If the current stack pointer (according to StackPointer()) is csp, then
@ -1998,9 +2030,12 @@ class MacroAssembler : public Assembler {
// Perform necessary maintenance operations before a push or pop.
//
// Note that size is per register, and is specified in bytes.
void PrepareForPush(int count, int size);
void PrepareForPop(int count, int size);
// Note that size is specified in bytes.
void PrepareForPush(Operand total_size);
void PrepareForPop(Operand total_size);
void PrepareForPush(int count, int size) { PrepareForPush(count * size); }
void PrepareForPop(int count, int size) { PrepareForPop(count * size); }
// Call Printf. On a native build, a simple call will be generated, but if the
// simulator is being used then a suitable pseudo-instruction is used. The

View File

@ -754,14 +754,16 @@ void StubCompiler::GenerateFastApiCall(MacroAssembler* masm,
int argc,
Register* values) {
ASSERT(!AreAliased(receiver, scratch));
__ Push(receiver);
// Write the arguments to stack frame.
MacroAssembler::PushPopQueue queue(masm);
queue.Queue(receiver);
// Write the arguments to the stack frame.
for (int i = 0; i < argc; i++) {
// TODO(jbramley): Push these in as few Push() calls as possible.
Register arg = values[argc-1-i];
ASSERT(!AreAliased(receiver, scratch, arg));
__ Push(arg);
queue.Queue(arg);
}
queue.PushQueued();
ASSERT(optimization.is_simple_api_call());

View File

@ -1332,6 +1332,7 @@ class MaybeObject BASE_EMBEDDED {
"The instruction to patch should be an ori") \
V(kTheSourceAndDestinationAreTheSame, \
"The source and destination are the same") \
V(kTheStackPointerIsNotAligned, "The stack pointer is not aligned.") \
V(kTheStackWasCorruptedByMacroAssemblerCall, \
"The stack was corrupted by MacroAssembler::Call()") \
V(kTooManyParametersLocals, "Too many parameters/locals") \

View File

@ -8439,7 +8439,8 @@ static void PushPopJsspWXOverlapHelper(int reg_count, int claim) {
SETUP_SIZE(BUF_SIZE * 2);
// Work out which registers to use, based on reg_size.
static RegList const allowed = ~(x8.Bit() | x9.Bit() | jssp.Bit());
Register tmp = x8;
static RegList const allowed = ~(tmp.Bit() | jssp.Bit());
if (reg_count == kPushPopJsspMaxRegCount) {
reg_count = CountSetBits(allowed, kNumberOfRegisters);
}
@ -8525,7 +8526,13 @@ static void PushPopJsspWXOverlapHelper(int reg_count, int claim) {
int times = i % 4 + 1;
if (i & 1) {
// Push odd-numbered registers as W registers.
__ PushMultipleTimes(times, w[i]);
if (i & 2) {
__ PushMultipleTimes(w[i], times);
} else {
// Use a register to specify the count.
__ Mov(tmp.W(), times);
__ PushMultipleTimes(w[i], tmp.W());
}
// Fill in the expected stack slots.
for (int j = 0; j < times; j++) {
if (w[i].Is(wzr)) {
@ -8537,7 +8544,13 @@ static void PushPopJsspWXOverlapHelper(int reg_count, int claim) {
}
} else {
// Push even-numbered registers as X registers.
__ PushMultipleTimes(times, x[i]);
if (i & 2) {
__ PushMultipleTimes(x[i], times);
} else {
// Use a register to specify the count.
__ Mov(tmp, times);
__ PushMultipleTimes(x[i], tmp);
}
// Fill in the expected stack slots.
for (int j = 0; j < times; j++) {
if (x[i].IsZero()) {
@ -8728,6 +8741,156 @@ TEST(push_pop_csp) {
}
TEST(push_queued) {
INIT_V8();
SETUP();
START();
ASSERT(__ StackPointer().Is(csp));
__ Mov(jssp, __ StackPointer());
__ SetStackPointer(jssp);
MacroAssembler::PushPopQueue queue(&masm);
// Queue up registers.
queue.Queue(x0);
queue.Queue(x1);
queue.Queue(x2);
queue.Queue(x3);
queue.Queue(w4);
queue.Queue(w5);
queue.Queue(w6);
queue.Queue(d0);
queue.Queue(d1);
queue.Queue(s2);
__ Mov(x0, 0x1234000000000000);
__ Mov(x1, 0x1234000100010001);
__ Mov(x2, 0x1234000200020002);
__ Mov(x3, 0x1234000300030003);
__ Mov(w4, 0x12340004);
__ Mov(w5, 0x12340005);
__ Mov(w6, 0x12340006);
__ Fmov(d0, 123400.0);
__ Fmov(d1, 123401.0);
__ Fmov(s2, 123402.0);
// Actually push them.
queue.PushQueued();
Clobber(&masm, CPURegList(CPURegister::kRegister, kXRegSize, 0, 6));
Clobber(&masm, CPURegList(CPURegister::kFPRegister, kDRegSize, 0, 2));
// Pop them conventionally.
__ Pop(s2);
__ Pop(d1, d0);
__ Pop(w6, w5, w4);
__ Pop(x3, x2, x1, x0);
__ Mov(csp, __ StackPointer());
__ SetStackPointer(csp);
END();
RUN();
ASSERT_EQUAL_64(0x1234000000000000, x0);
ASSERT_EQUAL_64(0x1234000100010001, x1);
ASSERT_EQUAL_64(0x1234000200020002, x2);
ASSERT_EQUAL_64(0x1234000300030003, x3);
ASSERT_EQUAL_32(0x12340004, w4);
ASSERT_EQUAL_32(0x12340005, w5);
ASSERT_EQUAL_32(0x12340006, w6);
ASSERT_EQUAL_FP64(123400.0, d0);
ASSERT_EQUAL_FP64(123401.0, d1);
ASSERT_EQUAL_FP32(123402.0, s2);
TEARDOWN();
}
TEST(pop_queued) {
INIT_V8();
SETUP();
START();
ASSERT(__ StackPointer().Is(csp));
__ Mov(jssp, __ StackPointer());
__ SetStackPointer(jssp);
MacroAssembler::PushPopQueue queue(&masm);
__ Mov(x0, 0x1234000000000000);
__ Mov(x1, 0x1234000100010001);
__ Mov(x2, 0x1234000200020002);
__ Mov(x3, 0x1234000300030003);
__ Mov(w4, 0x12340004);
__ Mov(w5, 0x12340005);
__ Mov(w6, 0x12340006);
__ Fmov(d0, 123400.0);
__ Fmov(d1, 123401.0);
__ Fmov(s2, 123402.0);
// Push registers conventionally.
__ Push(x0, x1, x2, x3);
__ Push(w4, w5, w6);
__ Push(d0, d1);
__ Push(s2);
// Queue up a pop.
queue.Queue(s2);
queue.Queue(d1);
queue.Queue(d0);
queue.Queue(w6);
queue.Queue(w5);
queue.Queue(w4);
queue.Queue(x3);
queue.Queue(x2);
queue.Queue(x1);
queue.Queue(x0);
Clobber(&masm, CPURegList(CPURegister::kRegister, kXRegSize, 0, 6));
Clobber(&masm, CPURegList(CPURegister::kFPRegister, kDRegSize, 0, 2));
// Actually pop them.
queue.PopQueued();
__ Mov(csp, __ StackPointer());
__ SetStackPointer(csp);
END();
RUN();
ASSERT_EQUAL_64(0x1234000000000000, x0);
ASSERT_EQUAL_64(0x1234000100010001, x1);
ASSERT_EQUAL_64(0x1234000200020002, x2);
ASSERT_EQUAL_64(0x1234000300030003, x3);
ASSERT_EQUAL_64(0x0000000012340004, x4);
ASSERT_EQUAL_64(0x0000000012340005, x5);
ASSERT_EQUAL_64(0x0000000012340006, x6);
ASSERT_EQUAL_FP64(123400.0, d0);
ASSERT_EQUAL_FP64(123401.0, d1);
ASSERT_EQUAL_FP32(123402.0, s2);
TEARDOWN();
}
TEST(jump_both_smi) {
INIT_V8();
SETUP();

View File

@ -65,9 +65,9 @@ ConvertDToIFunc MakeConvertDToIFuncTrampoline(Isolate* isolate,
// Push the double argument.
__ Push(d0);
if (!source_reg.is(jssp)) {
__ Mov(source_reg, jssp);
}
__ Mov(source_reg, jssp);
MacroAssembler::PushPopQueue queue(&masm);
// Save registers make sure they don't get clobbered.
int source_reg_offset = kDoubleSize;
@ -75,13 +75,14 @@ ConvertDToIFunc MakeConvertDToIFuncTrampoline(Isolate* isolate,
for (;reg_num < Register::NumAllocatableRegisters(); ++reg_num) {
Register reg = Register::from_code(reg_num);
if (!reg.is(destination_reg)) {
__ Push(reg);
queue.Queue(reg);
source_reg_offset += kPointerSize;
}
}
// Re-push the double argument.
__ Push(d0);
queue.Queue(d0);
queue.PushQueued();
// Call through to the actual stub
if (inline_fastpath) {