Add support for deoptimization on x64.

I did not take out the code relating to osr from the generate method
since this makes it easier to compare to ia32 (we will abort anyway when we hit the osr code so there should be no issues with having this in)
 

Review URL: http://codereview.chromium.org/6390001

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6449 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
ricow@chromium.org 2011-01-25 07:57:56 +00:00
parent 161d631b5e
commit d5cab38a33
9 changed files with 501 additions and 19 deletions

View File

@ -370,7 +370,7 @@ void Deoptimizer::EntryGenerator::Generate() {
// Copy core registers into FrameDescription::registers_[kNumRegisters].
ASSERT(Register::kNumRegisters == kNumberOfRegisters);
for (int i = 0; i < kNumberOfRegisters; i++) {
int offset = (i * kIntSize) + FrameDescription::registers_offset();
int offset = (i * kPointerSize) + FrameDescription::registers_offset();
__ ldr(r2, MemOperand(sp, i * kPointerSize));
__ str(r2, MemOperand(r1, offset));
}
@ -459,7 +459,7 @@ void Deoptimizer::EntryGenerator::Generate() {
// Push the registers from the last output frame.
for (int i = kNumberOfRegisters - 1; i >= 0; i--) {
int offset = (i * kIntSize) + FrameDescription::registers_offset();
int offset = (i * kPointerSize) + FrameDescription::registers_offset();
__ ldr(r6, MemOperand(r2, offset));
__ push(r6);
}

View File

@ -535,7 +535,7 @@ void Deoptimizer::EntryGenerator::Generate() {
// Fill in the input registers.
for (int i = 0; i < kNumberOfRegisters; i++) {
int offset = (i * kIntSize) + FrameDescription::registers_offset();
int offset = (i * kPointerSize) + FrameDescription::registers_offset();
__ mov(ecx, Operand(esp, (kNumberOfRegisters - 1 - i) * kPointerSize));
__ mov(Operand(ebx, offset), ecx);
}
@ -618,7 +618,7 @@ void Deoptimizer::EntryGenerator::Generate() {
// Push the registers from the last output frame.
for (int i = 0; i < kNumberOfRegisters; i++) {
int offset = (i * kIntSize) + FrameDescription::registers_offset();
int offset = (i * kPointerSize) + FrameDescription::registers_offset();
__ push(Operand(ebx, offset));
}

View File

@ -1278,6 +1278,18 @@ void Assembler::j(Condition cc, NearLabel* L, Hint hint) {
}
void Assembler::j(Condition cc, byte* entry, RelocInfo::Mode rmode) {
EnsureSpace ensure_space(this);
RecordRelocInfo(rmode);
last_pc_ = pc_;
ASSERT((0 <= cc) && (cc < 16));
// 0000 1111 1000 tttn #32-bit disp.
emit(0x0F);
emit(0x80 | cc);
emit(entry - (pc_ + sizeof(intptr_t)));
}
void Assembler::jmp(Label* L) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
@ -1949,6 +1961,14 @@ void Assembler::push(Immediate value) {
}
void Assembler::push_imm32(int32_t imm32) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit(0x68);
emitl(imm32);
}
void Assembler::pushfq() {
EnsureSpace ensure_space(this);
last_pc_ = pc_;

View File

@ -590,6 +590,9 @@ class Assembler : public Malloced {
void popfq();
void push(Immediate value);
// Push a 32 bit integer, and guarantee that it is actually pushed as a
// 32 bit value, the normal push will optimize the 8 bit case.
void push_imm32(int32_t imm32);
void push(Register src);
void push(const Operand& src);
@ -1122,6 +1125,7 @@ class Assembler : public Malloced {
// Conditional jumps
void j(Condition cc, Label* L);
void j(Condition cc, byte* entry, RelocInfo::Mode rmode);
void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
// Conditional short jump

View File

@ -561,7 +561,33 @@ void Builtins::Generate_LazyRecompile(MacroAssembler* masm) {
static void Generate_NotifyDeoptimizedHelper(MacroAssembler* masm,
Deoptimizer::BailoutType type) {
__ int3();
// Enter an internal frame.
__ EnterInternalFrame();
// Pass the deoptimization type to the runtime system.
__ Push(Smi::FromInt(static_cast<int>(type)));
__ CallRuntime(Runtime::kNotifyDeoptimized, 1);
// Tear down temporary frame.
__ LeaveInternalFrame();
// Get the full codegen state from the stack and untag it.
__ SmiToInteger32(rcx, Operand(rsp, 1 * kPointerSize));
// Switch on the state.
NearLabel not_no_registers, not_tos_rax;
__ cmpq(rcx, Immediate(FullCodeGenerator::NO_REGISTERS));
__ j(not_equal, &not_no_registers);
__ ret(1 * kPointerSize); // Remove state.
__ bind(&not_no_registers);
__ movq(rax, Operand(rsp, 2 * kPointerSize));
__ cmpq(rcx, Immediate(FullCodeGenerator::TOS_REG));
__ j(not_equal, &not_tos_rax);
__ ret(2 * kPointerSize); // Remove state, rax.
__ bind(&not_tos_rax);
__ Abort("no cases left");
}
void Builtins::Generate_NotifyDeoptimized(MacroAssembler* masm) {

View File

@ -41,8 +41,68 @@ namespace internal {
int Deoptimizer::table_entry_size_ = 10;
void Deoptimizer::DeoptimizeFunction(JSFunction* function) {
// UNIMPLEMENTED, for now just return.
return;
AssertNoAllocation no_allocation;
if (!function->IsOptimized()) return;
// Get the optimized code.
Code* code = function->code();
// Invalidate the relocation information, as it will become invalid by the
// code patching below, and is not needed any more.
code->InvalidateRelocation();
// For each return after a safepoint insert a absolute call to the
// corresponding deoptimization entry.
unsigned last_pc_offset = 0;
SafepointTable table(function->code());
for (unsigned i = 0; i < table.length(); i++) {
unsigned pc_offset = table.GetPcOffset(i);
SafepointEntry safepoint_entry = table.GetEntry(i);
int deoptimization_index = safepoint_entry.deoptimization_index();
int gap_code_size = safepoint_entry.gap_code_size();
#ifdef DEBUG
// Destroy the code which is not supposed to run again.
unsigned instructions = pc_offset - last_pc_offset;
CodePatcher destroyer(code->instruction_start() + last_pc_offset,
instructions);
for (unsigned i = 0; i < instructions; i++) {
destroyer.masm()->int3();
}
#endif
last_pc_offset = pc_offset;
if (deoptimization_index != Safepoint::kNoDeoptimizationIndex) {
CodePatcher patcher(
code->instruction_start() + pc_offset + gap_code_size,
Assembler::kCallInstructionLength);
patcher.masm()->Call(GetDeoptimizationEntry(deoptimization_index, LAZY),
RelocInfo::NONE);
last_pc_offset += gap_code_size + Assembler::kCallInstructionLength;
}
}
#ifdef DEBUG
// Destroy the code which is not supposed to run again.
unsigned instructions = code->safepoint_table_start() - last_pc_offset;
CodePatcher destroyer(code->instruction_start() + last_pc_offset,
instructions);
for (unsigned i = 0; i < instructions; i++) {
destroyer.masm()->int3();
}
#endif
// Add the deoptimizing code to the list.
DeoptimizingCodeListNode* node = new DeoptimizingCodeListNode(code);
node->set_next(deoptimizing_code_list_);
deoptimizing_code_list_ = node;
// Set the code for the function to non-optimized version.
function->ReplaceCode(function->shared()->code());
if (FLAG_trace_deopt) {
PrintF("[forced deoptimization: ");
function->PrintName();
PrintF(" / %" V8PRIxPTR "]\n", reinterpret_cast<intptr_t>(function));
}
}
@ -67,20 +127,384 @@ void Deoptimizer::DoComputeOsrOutputFrame() {
void Deoptimizer::DoComputeFrame(TranslationIterator* iterator,
int frame_index) {
UNIMPLEMENTED();
// Read the ast node id, function, and frame height for this output frame.
Translation::Opcode opcode =
static_cast<Translation::Opcode>(iterator->Next());
USE(opcode);
ASSERT(Translation::FRAME == opcode);
int node_id = iterator->Next();
JSFunction* function = JSFunction::cast(ComputeLiteral(iterator->Next()));
unsigned height = iterator->Next();
unsigned height_in_bytes = height * kPointerSize;
if (FLAG_trace_deopt) {
PrintF(" translating ");
function->PrintName();
PrintF(" => node=%d, height=%d\n", node_id, height_in_bytes);
}
// The 'fixed' part of the frame consists of the incoming parameters and
// the part described by JavaScriptFrameConstants.
unsigned fixed_frame_size = ComputeFixedSize(function);
unsigned input_frame_size = input_->GetFrameSize();
unsigned output_frame_size = height_in_bytes + fixed_frame_size;
// Allocate and store the output frame description.
FrameDescription* output_frame =
new(output_frame_size) FrameDescription(output_frame_size, function);
bool is_bottommost = (0 == frame_index);
bool is_topmost = (output_count_ - 1 == frame_index);
ASSERT(frame_index >= 0 && frame_index < output_count_);
ASSERT(output_[frame_index] == NULL);
output_[frame_index] = output_frame;
// The top address for the bottommost output frame can be computed from
// the input frame pointer and the output frame's height. For all
// subsequent output frames, it can be computed from the previous one's
// top address and the current frame's size.
intptr_t top_address;
if (is_bottommost) {
// 2 = context and function in the frame.
top_address =
input_->GetRegister(rbp.code()) - (2 * kPointerSize) - height_in_bytes;
} else {
top_address = output_[frame_index - 1]->GetTop() - output_frame_size;
}
output_frame->SetTop(top_address);
// Compute the incoming parameter translation.
int parameter_count = function->shared()->formal_parameter_count() + 1;
unsigned output_offset = output_frame_size;
unsigned input_offset = input_frame_size;
for (int i = 0; i < parameter_count; ++i) {
output_offset -= kPointerSize;
DoTranslateCommand(iterator, frame_index, output_offset);
}
input_offset -= (parameter_count * kPointerSize);
// There are no translation commands for the caller's pc and fp, the
// context, and the function. Synthesize their values and set them up
// explicitly.
//
// The caller's pc for the bottommost output frame is the same as in the
// input frame. For all subsequent output frames, it can be read from the
// previous one. This frame's pc can be computed from the non-optimized
// function code and AST id of the bailout.
output_offset -= kPointerSize;
input_offset -= kPointerSize;
intptr_t value;
if (is_bottommost) {
value = input_->GetFrameSlot(input_offset);
} else {
value = output_[frame_index - 1]->GetPc();
}
output_frame->SetFrameSlot(output_offset, value);
if (FLAG_trace_deopt) {
PrintF(" 0x%08" V8PRIxPTR ": [top + %d] <- 0x%08"
V8PRIxPTR " ; caller's pc\n",
top_address + output_offset, output_offset, value);
}
// The caller's frame pointer for the bottommost output frame is the same
// as in the input frame. For all subsequent output frames, it can be
// read from the previous one. Also compute and set this frame's frame
// pointer.
output_offset -= kPointerSize;
input_offset -= kPointerSize;
if (is_bottommost) {
value = input_->GetFrameSlot(input_offset);
} else {
value = output_[frame_index - 1]->GetFp();
}
output_frame->SetFrameSlot(output_offset, value);
intptr_t fp_value = top_address + output_offset;
ASSERT(!is_bottommost || input_->GetRegister(rbp.code()) == fp_value);
output_frame->SetFp(fp_value);
if (is_topmost) output_frame->SetRegister(rbp.code(), fp_value);
if (FLAG_trace_deopt) {
PrintF(" 0x%08" V8PRIxPTR ": [top + %d] <- 0x%08"
V8PRIxPTR " ; caller's fp\n",
fp_value, output_offset, value);
}
// The context can be gotten from the function so long as we don't
// optimize functions that need local contexts.
output_offset -= kPointerSize;
input_offset -= kPointerSize;
value = reinterpret_cast<intptr_t>(function->context());
// The context for the bottommost output frame should also agree with the
// input frame.
ASSERT(!is_bottommost || input_->GetFrameSlot(input_offset) == value);
output_frame->SetFrameSlot(output_offset, value);
if (is_topmost) output_frame->SetRegister(rsi.code(), value);
if (FLAG_trace_deopt) {
PrintF(" 0x%08" V8PRIxPTR ": [top + %d] <- 0x%08"
V8PRIxPTR "; context\n",
top_address + output_offset, output_offset, value);
}
// The function was mentioned explicitly in the BEGIN_FRAME.
output_offset -= kPointerSize;
input_offset -= kPointerSize;
value = reinterpret_cast<intptr_t>(function);
// The function for the bottommost output frame should also agree with the
// input frame.
ASSERT(!is_bottommost || input_->GetFrameSlot(input_offset) == value);
output_frame->SetFrameSlot(output_offset, value);
if (FLAG_trace_deopt) {
PrintF(" 0x%08" V8PRIxPTR ": [top + %d] <- 0x%08"
V8PRIxPTR "; function\n",
top_address + output_offset, output_offset, value);
}
// Translate the rest of the frame.
for (unsigned i = 0; i < height; ++i) {
output_offset -= kPointerSize;
DoTranslateCommand(iterator, frame_index, output_offset);
}
ASSERT(0 == output_offset);
// Compute this frame's PC, state, and continuation.
Code* non_optimized_code = function->shared()->code();
FixedArray* raw_data = non_optimized_code->deoptimization_data();
DeoptimizationOutputData* data = DeoptimizationOutputData::cast(raw_data);
Address start = non_optimized_code->instruction_start();
unsigned pc_and_state = GetOutputInfo(data, node_id, function->shared());
unsigned pc_offset = FullCodeGenerator::PcField::decode(pc_and_state);
intptr_t pc_value = reinterpret_cast<intptr_t>(start + pc_offset);
output_frame->SetPc(pc_value);
FullCodeGenerator::State state =
FullCodeGenerator::StateField::decode(pc_and_state);
output_frame->SetState(Smi::FromInt(state));
// Set the continuation for the topmost frame.
if (is_topmost) {
Code* continuation = (bailout_type_ == EAGER)
? Builtins::builtin(Builtins::NotifyDeoptimized)
: Builtins::builtin(Builtins::NotifyLazyDeoptimized);
output_frame->SetContinuation(
reinterpret_cast<intptr_t>(continuation->entry()));
}
if (output_count_ - 1 == frame_index) iterator->Done();
}
#define __ masm()->
void Deoptimizer::EntryGenerator::Generate() {
// UNIMPLEMENTED, for now just return.
return;
GeneratePrologue();
CpuFeatures::Scope scope(SSE2);
// Save all general purpose registers before messing with them.
const int kNumberOfRegisters = Register::kNumRegisters;
const int kDoubleRegsSize = kDoubleSize *
XMMRegister::kNumAllocatableRegisters;
__ subq(rsp, Immediate(kDoubleRegsSize));
for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i);
int offset = i * kDoubleSize;
__ movsd(Operand(rsp, offset), xmm_reg);
}
// We push all registers onto the stack, even though we do not need
// to restore all later.
for (int i = 0; i < kNumberOfRegisters; i++) {
Register r = Register::toRegister(i);
__ push(r);
}
const int kSavedRegistersAreaSize = kNumberOfRegisters * kPointerSize +
kDoubleRegsSize;
// When calling new_deoptimizer_function we need to pass the last argument
// on the stack on windows and in r8 on linux. The remaining arguments are
// all passed in registers (different ones on linux and windows though).
#ifdef _WIN64
Register arg4 = r9;
Register arg3 = r8;
Register arg2 = rdx;
Register arg1 = rcx;
#else
Register arg4 = rcx;
Register arg3 = rdx;
Register arg2 = rsi;
Register arg1 = rdi;
#endif
// We use this to keep the value of the fifth argument temporarily.
// Unfortunately we can't store it directly in r8 (used for passing
// this on linux), since it is another parameter passing register on windows.
Register arg5 = r11;
// Get the bailout id from the stack.
__ movq(arg3, Operand(rsp, kSavedRegistersAreaSize));
// Get the address of the location in the code object if possible
// and compute the fp-to-sp delta in register arg5.
if (type() == EAGER) {
__ Set(arg4, 0);
__ lea(arg5, Operand(rsp, kSavedRegistersAreaSize + 1 * kPointerSize));
} else {
__ movq(arg4, Operand(rsp, kSavedRegistersAreaSize + 1 * kPointerSize));
__ lea(arg5, Operand(rsp, kSavedRegistersAreaSize + 2 * kPointerSize));
}
__ subq(arg5, rbp);
__ neg(arg5);
// Allocate a new deoptimizer object.
__ PrepareCallCFunction(5);
__ movq(rax, Operand(rbp, JavaScriptFrameConstants::kFunctionOffset));
__ movq(arg1, rax);
__ movq(arg2, Immediate(type()));
// Args 3 and 4 are already in the right registers.
// On windows put the argument on the stack (PrepareCallCFunction have
// created space for this). On linux pass the argument in r8.
#ifdef _WIN64
__ movq(Operand(rsp, 0 * kPointerSize), arg5);
#else
__ movq(r8, arg5);
#endif
__ CallCFunction(ExternalReference::new_deoptimizer_function(), 5);
// Preserve deoptimizer object in register rax and get the input
// frame descriptor pointer.
__ movq(rbx, Operand(rax, Deoptimizer::input_offset()));
// Fill in the input registers.
for (int i = kNumberOfRegisters -1; i >= 0; i--) {
int offset = (i * kPointerSize) + FrameDescription::registers_offset();
__ pop(Operand(rbx, offset));
// __ movq(rcx, Operand(rsp, (kNumberOfRegisters - 1 - i) * kPointerSize));
// __ movq(Operand(rbx, offset), rcx);
}
// Fill in the double input registers.
int double_regs_offset = FrameDescription::double_registers_offset();
for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; i++) {
int dst_offset = i * kDoubleSize + double_regs_offset;
__ pop(Operand(rbx, dst_offset));
}
// Remove the bailout id and the general purpose registers from the stack.
if (type() == EAGER) {
__ addq(rsp, Immediate(kPointerSize));
} else {
__ addq(rsp, Immediate(2 * kPointerSize));
}
// Compute a pointer to the unwinding limit in register ecx; that is
// the first stack slot not part of the input frame.
__ movq(rcx, Operand(rbx, FrameDescription::frame_size_offset()));
__ addq(rcx, rsp);
// Unwind the stack down to - but not including - the unwinding
// limit and copy the contents of the activation frame to the input
// frame description.
__ lea(rdx, Operand(rbx, FrameDescription::frame_content_offset()));
Label pop_loop;
__ bind(&pop_loop);
__ pop(Operand(rdx, 0));
__ addq(rdx, Immediate(sizeof(intptr_t)));
__ cmpq(rcx, rsp);
__ j(not_equal, &pop_loop);
// Compute the output frame in the deoptimizer.
__ push(rax);
__ PrepareCallCFunction(1);
__ movq(arg1, rax);
__ CallCFunction(ExternalReference::compute_output_frames_function(), 1);
__ pop(rax);
// Replace the current frame with the output frames.
Label outer_push_loop, inner_push_loop;
// Outer loop state: rax = current FrameDescription**, rdx = one past the
// last FrameDescription**.
__ movl(rdx, Operand(rax, Deoptimizer::output_count_offset()));
__ movq(rax, Operand(rax, Deoptimizer::output_offset()));
__ lea(rdx, Operand(rax, rdx, times_8, 0));
__ bind(&outer_push_loop);
// Inner loop state: rbx = current FrameDescription*, rcx = loop index.
__ movq(rbx, Operand(rax, 0));
__ movq(rcx, Operand(rbx, FrameDescription::frame_size_offset()));
__ bind(&inner_push_loop);
__ subq(rcx, Immediate(sizeof(intptr_t)));
__ push(Operand(rbx, rcx, times_1, FrameDescription::frame_content_offset()));
__ testq(rcx, rcx);
__ j(not_zero, &inner_push_loop);
__ addq(rax, Immediate(kPointerSize));
__ cmpq(rax, rdx);
__ j(below, &outer_push_loop);
// In case of OSR, we have to restore the XMM registers.
if (type() == OSR) {
for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i);
int src_offset = i * kDoubleSize + double_regs_offset;
__ movsd(xmm_reg, Operand(rbx, src_offset));
}
}
// Push state, pc, and continuation from the last output frame.
if (type() != OSR) {
__ push(Operand(rbx, FrameDescription::state_offset()));
}
__ push(Operand(rbx, FrameDescription::pc_offset()));
__ push(Operand(rbx, FrameDescription::continuation_offset()));
// Push the registers from the last output frame.
for (int i = 0; i < kNumberOfRegisters; i++) {
int offset = (i * kPointerSize) + FrameDescription::registers_offset();
__ push(Operand(rbx, offset));
}
// Restore the registers from the stack.
for (int i = kNumberOfRegisters - 1; i >= 0 ; i--) {
Register r = Register::toRegister(i);
// Do not restore rsp, simply pop the value into the next register
// and overwrite this afterwards.
if (r.is(rsp)) {
ASSERT(i > 0);
r = Register::toRegister(i - 1);
}
__ pop(r);
}
// Set up the roots register.
ExternalReference roots_address = ExternalReference::roots_address();
__ movq(r13, roots_address);
__ movq(kSmiConstantRegister,
reinterpret_cast<uint64_t>(Smi::FromInt(kSmiConstantRegisterValue)),
RelocInfo::NONE);
// Return to the continuation point.
__ ret(0);
}
void Deoptimizer::TableEntryGenerator::GeneratePrologue() {
UNIMPLEMENTED();
// Create a sequence of deoptimization entries.
Label done;
for (int i = 0; i < count(); i++) {
int start = masm()->pc_offset();
USE(start);
__ push_imm32(i);
__ jmp(&done);
ASSERT(masm()->pc_offset() - start == table_entry_size_);
}
__ bind(&done);
}
#undef __
} } // namespace v8::internal
#endif // V8_TARGET_ARCH_X64

View File

@ -567,7 +567,21 @@ void LCodeGen::RegisterEnvironmentForDeoptimization(LEnvironment* environment) {
void LCodeGen::DeoptimizeIf(Condition cc, LEnvironment* environment) {
Abort("Unimplemented: %s", "Deoptimiz");
RegisterEnvironmentForDeoptimization(environment);
ASSERT(environment->HasBeenRegistered());
int id = environment->deoptimization_index();
Address entry = Deoptimizer::GetDeoptimizationEntry(id, Deoptimizer::EAGER);
ASSERT(entry != NULL);
if (entry == NULL) {
Abort("bailout was not prepared");
return;
}
if (cc == no_condition) {
__ Jump(entry, RelocInfo::RUNTIME_ENTRY);
} else {
__ j(cc, entry, RelocInfo::RUNTIME_ENTRY);
}
}

View File

@ -77,12 +77,9 @@ test-deoptimization/DeoptimizeCompare: FAIL
# Tests that time out with crankshaft.
test-api/Threading: SKIP
# BUG(1049): Currently no deoptimization support.
# BUG(1069): Context serialization fails on optimized functions.
test-serialize/ContextSerialization: SKIP
test-serialize/ContextDeserialization: SKIP
test-debug/BreakPointReturn: SKIP
test-debug/DebugStepLinearMixedICs: SKIP
test-debug/DebugConditional: SKIP
##############################################################################
[ $arch == arm ]

View File

@ -119,9 +119,6 @@ compiler/simple-osr: FAIL
# BUG (1026) This test is currently flaky.
compiler/simple-osr: SKIP
# BUG(1049): Currently no deoptimization support.
debug-liveedit-newsource: SKIP
debug-liveedit-1: SKIP
##############################################################################
[ $arch == mips ]