From df8548cd5b492e234f9ecc2e34a6024c3e39fe29 Mon Sep 17 00:00:00 2001 From: Seth Brenith Date: Thu, 2 May 2019 10:02:14 -0700 Subject: [PATCH] Touch guard pages when allocating stack frames On Windows, expanding the stack by more than 4 KB at a time can cause access violations. This change fixes a few known cases (and includes unit tests for those), and attempts to make stack expansion more consistent overall by using the AllocateStackSpace helper method everywhere we can, even when the offset is a small constant. On arm64, there was already a consistent method for stack pointer manipulation using the Claim and Drop methods, so Claim is updated to touch every page. Bug: v8:9017 Change-Id: I2dbbceeebbdefaf45803e9b621fe83f52234a395 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1570666 Commit-Queue: Seth Brenith Reviewed-by: Michael Starzinger Reviewed-by: Clemens Hammacher Reviewed-by: Jakob Gruber Cr-Commit-Position: refs/heads/master@{#61186} --- src/arm/assembler-arm-inl.h | 4 -- src/arm/assembler-arm.h | 5 +- src/arm/deoptimizer-arm.cc | 2 + src/arm/macro-assembler-arm.cc | 48 ++++++++++++-- src/arm/macro-assembler-arm.h | 13 ++++ src/arm64/macro-assembler-arm64-inl.h | 64 ++++++++++--------- src/arm64/macro-assembler-arm64.h | 13 +--- src/builtins/arm/builtins-arm.cc | 29 ++++++--- src/builtins/arm64/builtins-arm64.cc | 24 +------ src/builtins/ia32/builtins-ia32.cc | 23 +++++-- src/builtins/x64/builtins-x64.cc | 28 ++++---- .../backend/arm/code-generator-arm.cc | 6 +- .../backend/ia32/code-generator-ia32.cc | 32 +++++----- .../backend/x64/code-generator-x64.cc | 20 +++--- src/ia32/assembler-ia32.h | 2 +- src/ia32/deoptimizer-ia32.cc | 4 +- src/ia32/macro-assembler-ia32.cc | 32 ++++++---- src/ia32/macro-assembler-ia32.h | 18 +++--- src/regexp/arm/regexp-macro-assembler-arm.cc | 4 +- .../ia32/regexp-macro-assembler-ia32.cc | 14 +--- src/regexp/x64/regexp-macro-assembler-x64.cc | 14 +--- src/turbo-assembler.h | 6 ++ src/wasm/baseline/arm/liftoff-assembler-arm.h | 26 +++++++- .../baseline/arm64/liftoff-assembler-arm64.h | 20 ++++++ .../baseline/ia32/liftoff-assembler-ia32.h | 20 +++--- src/wasm/baseline/x64/liftoff-assembler-x64.h | 36 +++++++++-- src/x64/assembler-x64.cc | 40 +++++++----- src/x64/assembler-x64.h | 4 ++ src/x64/deoptimizer-x64.cc | 4 +- src/x64/macro-assembler-x64.cc | 40 ++++++++++-- src/x64/macro-assembler-x64.h | 13 ++++ test/cctest/test-assembler-arm64.cc | 4 -- test/cctest/test-assembler-ia32.cc | 4 +- test/cctest/test-assembler-x64.cc | 6 +- test/cctest/test-macro-assembler-x64.cc | 8 +-- test/mjsunit/compiler/regress-9017.js | 39 +++++++++++ test/mjsunit/mjsunit.status | 1 + test/mjsunit/regress/regress-9017.js | 15 +++++ test/mjsunit/regress/wasm/regress-9017.js | 38 +++++++++++ 39 files changed, 488 insertions(+), 235 deletions(-) create mode 100644 test/mjsunit/compiler/regress-9017.js create mode 100644 test/mjsunit/regress/regress-9017.js create mode 100644 test/mjsunit/regress/wasm/regress-9017.js diff --git a/src/arm/assembler-arm-inl.h b/src/arm/assembler-arm-inl.h index a1fda77c93..20e256356e 100644 --- a/src/arm/assembler-arm-inl.h +++ b/src/arm/assembler-arm-inl.h @@ -188,10 +188,6 @@ Handle Assembler::relative_code_target_object_handle_at( return GetCodeTarget(code_target_index); } -Operand::Operand(int32_t immediate, RelocInfo::Mode rmode) : rmode_(rmode) { - value_.immediate = immediate; -} - Operand Operand::Zero() { return Operand(static_cast(0)); } Operand::Operand(const ExternalReference& f) diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index 852b983ac4..0599a25120 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -83,7 +83,10 @@ class V8_EXPORT_PRIVATE Operand { public: // immediate V8_INLINE explicit Operand(int32_t immediate, - RelocInfo::Mode rmode = RelocInfo::NONE); + RelocInfo::Mode rmode = RelocInfo::NONE) + : rmode_(rmode) { + value_.immediate = immediate; + } V8_INLINE static Operand Zero(); V8_INLINE explicit Operand(const ExternalReference& f); explicit Operand(Handle handle); diff --git a/src/arm/deoptimizer-arm.cc b/src/arm/deoptimizer-arm.cc index edfb9c6096..402dc8d520 100644 --- a/src/arm/deoptimizer-arm.cc +++ b/src/arm/deoptimizer-arm.cc @@ -44,6 +44,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, // Push registers d0-d15, and possibly d16-d31, on the stack. // If d16-d31 are not pushed, decrease the stack pointer instead. __ vstm(db_w, sp, d16, d31, ne); + // Okay to not call AllocateStackSpace here because the size is a known + // small number and we need to use condition codes. __ sub(sp, sp, Operand(16 * kDoubleSize), LeaveCC, eq); __ vstm(db_w, sp, d0, d15); diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc index 48ffd27c20..3ca7a087d1 100644 --- a/src/arm/macro-assembler-arm.cc +++ b/src/arm/macro-assembler-arm.cc @@ -859,7 +859,7 @@ void MacroAssembler::PushSafepointRegisters() { // stack, so adjust the stack for unsaved registers. const int num_unsaved = kNumSafepointRegisters - kNumSafepointSavedRegisters; DCHECK_GE(num_unsaved, 0); - sub(sp, sp, Operand(num_unsaved * kPointerSize)); + AllocateStackSpace(num_unsaved * kPointerSize); stm(db_w, sp, kSafepointSavedRegisters); } @@ -1322,6 +1322,44 @@ int TurboAssembler::LeaveFrame(StackFrame::Type type) { return frame_ends; } +#ifdef V8_OS_WIN +void TurboAssembler::AllocateStackSpace(Register bytes_scratch) { + // "Functions that allocate 4 KB or more on the stack must ensure that each + // page prior to the final page is touched in order." Source: + // https://docs.microsoft.com/en-us/cpp/build/overview-of-arm-abi-conventions?view=vs-2019#stack + UseScratchRegisterScope temps(this); + DwVfpRegister scratch = temps.AcquireD(); + Label check_offset; + Label touch_next_page; + jmp(&check_offset); + bind(&touch_next_page); + sub(sp, sp, Operand(kStackPageSize)); + // Just to touch the page, before we increment further. + vldr(scratch, MemOperand(sp)); + sub(bytes_scratch, bytes_scratch, Operand(kStackPageSize)); + + bind(&check_offset); + cmp(bytes_scratch, Operand(kStackPageSize)); + b(gt, &touch_next_page); + + sub(sp, sp, bytes_scratch); +} + +void TurboAssembler::AllocateStackSpace(int bytes) { + UseScratchRegisterScope temps(this); + DwVfpRegister scratch = no_dreg; + while (bytes > kStackPageSize) { + if (scratch == no_dreg) { + scratch = temps.AcquireD(); + } + sub(sp, sp, Operand(kStackPageSize)); + vldr(scratch, MemOperand(sp)); + bytes -= kStackPageSize; + } + sub(sp, sp, Operand(bytes)); +} +#endif + void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space, StackFrame::Type frame_type) { DCHECK(frame_type == StackFrame::EXIT || @@ -1362,7 +1400,7 @@ void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space, // Reserve place for the return address and stack space and align the frame // preparing for calling the runtime function. const int frame_alignment = MacroAssembler::ActivationFrameAlignment(); - sub(sp, sp, Operand((stack_space + 1) * kPointerSize)); + AllocateStackSpace((stack_space + 1) * kPointerSize); if (frame_alignment > 0) { DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); and_(sp, sp, Operand(-frame_alignment)); @@ -1824,7 +1862,7 @@ void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone, // If we fell through then inline version didn't succeed - call stub instead. push(lr); - sub(sp, sp, Operand(kDoubleSize)); // Put input on stack. + AllocateStackSpace(kDoubleSize); // Put input on stack. vstr(double_input, MemOperand(sp, 0)); if (stub_mode == StubCallMode::kCallWasmRuntimeStub) { @@ -2353,12 +2391,12 @@ void TurboAssembler::PrepareCallCFunction(int num_reg_arguments, // Make stack end at alignment and make room for num_arguments - 4 words // and the original value of sp. mov(scratch, sp); - sub(sp, sp, Operand((stack_passed_arguments + 1) * kPointerSize)); + AllocateStackSpace((stack_passed_arguments + 1) * kPointerSize); DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); and_(sp, sp, Operand(-frame_alignment)); str(scratch, MemOperand(sp, stack_passed_arguments * kPointerSize)); } else if (stack_passed_arguments > 0) { - sub(sp, sp, Operand(stack_passed_arguments * kPointerSize)); + AllocateStackSpace(stack_passed_arguments * kPointerSize); } } diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h index fe08c4dc51..0d6a2450ad 100644 --- a/src/arm/macro-assembler-arm.h +++ b/src/arm/macro-assembler-arm.h @@ -52,6 +52,19 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { // Returns the pc offset at which the frame ends. int LeaveFrame(StackFrame::Type type); +// Allocate stack space of given size (i.e. decrement {sp} by the value +// stored in the given register, or by a constant). If you need to perform a +// stack check, do it before calling this function because this function may +// write into the newly allocated space. It may also overwrite the given +// register's value, in the version that takes a register. +#ifdef V8_OS_WIN + void AllocateStackSpace(Register bytes_scratch); + void AllocateStackSpace(int bytes); +#else + void AllocateStackSpace(Register bytes) { sub(sp, sp, bytes); } + void AllocateStackSpace(int bytes) { sub(sp, sp, Operand(bytes)); } +#endif + // Push a fixed frame, consisting of lr, fp void PushCommonFrame(Register marker_reg = no_reg); diff --git a/src/arm64/macro-assembler-arm64-inl.h b/src/arm64/macro-assembler-arm64-inl.h index be6cd4c933..d72f154891 100644 --- a/src/arm64/macro-assembler-arm64-inl.h +++ b/src/arm64/macro-assembler-arm64-inl.h @@ -1191,7 +1191,13 @@ void TurboAssembler::Claim(int64_t count, uint64_t unit_size) { return; } DCHECK_EQ(size % 16, 0); - +#if V8_OS_WIN + while (size > kStackPageSize) { + Sub(sp, sp, kStackPageSize); + Str(xzr, MemOperand(sp)); + size -= kStackPageSize; + } +#endif Sub(sp, sp, size); } @@ -1207,22 +1213,33 @@ void TurboAssembler::Claim(const Register& count, uint64_t unit_size) { } AssertPositiveOrZero(count); +#if V8_OS_WIN + // "Functions that allocate 4k or more worth of stack must ensure that each + // page prior to the final page is touched in order." Source: + // https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#stack + + // Callers expect count register to not be clobbered, so copy it. + UseScratchRegisterScope temps(this); + Register bytes_scratch = temps.AcquireX(); + Mov(bytes_scratch, size); + + Label check_offset; + Label touch_next_page; + B(&check_offset); + Bind(&touch_next_page); + Sub(sp, sp, kStackPageSize); + // Just to touch the page, before we increment further. + Str(xzr, MemOperand(sp)); + Sub(bytes_scratch, bytes_scratch, kStackPageSize); + + Bind(&check_offset); + Cmp(bytes_scratch, kStackPageSize); + B(gt, &touch_next_page); + + Sub(sp, sp, bytes_scratch); +#else Sub(sp, sp, size); -} - - -void MacroAssembler::ClaimBySMI(const Register& count_smi, uint64_t unit_size) { - DCHECK(unit_size == 0 || base::bits::IsPowerOfTwo(unit_size)); - const int shift = CountTrailingZeros(unit_size, kXRegSizeInBits) - kSmiShift; - const Operand size(count_smi, - (shift >= 0) ? (LSL) : (LSR), - (shift >= 0) ? (shift) : (-shift)); - - if (size.IsZero()) { - return; - } - - Sub(sp, sp, size); +#endif } void TurboAssembler::Drop(int64_t count, uint64_t unit_size) { @@ -1280,21 +1297,6 @@ void TurboAssembler::DropSlots(int64_t count) { void TurboAssembler::PushArgument(const Register& arg) { Push(padreg, arg); } -void MacroAssembler::DropBySMI(const Register& count_smi, uint64_t unit_size) { - DCHECK(unit_size == 0 || base::bits::IsPowerOfTwo(unit_size)); - const int shift = CountTrailingZeros(unit_size, kXRegSizeInBits) - kSmiShift; - const Operand size(count_smi, - (shift >= 0) ? (LSL) : (LSR), - (shift >= 0) ? (shift) : (-shift)); - - if (size.IsZero()) { - return; - } - - Add(sp, sp, size); -} - - void MacroAssembler::CompareAndBranch(const Register& lhs, const Operand& rhs, Condition cond, diff --git a/src/arm64/macro-assembler-arm64.h b/src/arm64/macro-assembler-arm64.h index c522ec372a..2755e56aba 100644 --- a/src/arm64/macro-assembler-arm64.h +++ b/src/arm64/macro-assembler-arm64.h @@ -646,10 +646,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { // Load a literal from the inline constant pool. inline void Ldr(const CPURegister& rt, const Operand& imm); - // Claim or drop stack space without actually accessing memory. + // Claim or drop stack space. // - // In debug mode, both of these will write invalid data into the claimed or - // dropped space. + // On Windows, Claim will write a value every 4k, as is required by the stack + // expansion mechanism. // // The stack pointer must be aligned to 16 bytes and the size claimed or // dropped must be a multiple of 16 bytes. @@ -1617,13 +1617,6 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler { // be aligned to 16 bytes. void PeekPair(const CPURegister& dst1, const CPURegister& dst2, int offset); - // Variants of Claim and Drop, where the 'count' parameter is a SMI held in a - // register. - inline void ClaimBySMI(const Register& count_smi, - uint64_t unit_size = kXRegSize); - inline void DropBySMI(const Register& count_smi, - uint64_t unit_size = kXRegSize); - // Compare a register with an operand, and branch to label depending on the // condition. May corrupt the status flags. inline void CompareAndBranch(const Register& lhs, diff --git a/src/builtins/arm/builtins-arm.cc b/src/builtins/arm/builtins-arm.cc index ff87055c66..6f8ae8a956 100644 --- a/src/builtins/arm/builtins-arm.cc +++ b/src/builtins/arm/builtins-arm.cc @@ -2079,17 +2079,27 @@ void Generate_PushBoundArguments(MacroAssembler* masm) { // -- r4 : the number of [[BoundArguments]] // ----------------------------------- - // Reserve stack space for the [[BoundArguments]]. + Register scratch = r6; + { - Label done; - __ sub(sp, sp, Operand(r4, LSL, kPointerSizeLog2)); // Check the stack for overflow. We are not trying to catch interruptions // (i.e. debug break and preemption) here, so check the "real stack // limit". - __ CompareRoot(sp, RootIndex::kRealStackLimit); - __ b(hs, &done); - // Restore the stack pointer. - __ add(sp, sp, Operand(r4, LSL, kPointerSizeLog2)); + Label done; + __ mov(scratch, Operand(r4, LSL, kPointerSizeLog2)); + { + UseScratchRegisterScope temps(masm); + Register remaining_stack_size = temps.Acquire(); + + // Compute the space we have left. The stack might already be overflowed + // here which will cause remaining_stack_size to become negative. + __ LoadRoot(remaining_stack_size, RootIndex::kRealStackLimit); + __ sub(remaining_stack_size, sp, remaining_stack_size); + + // Check if the arguments will overflow the stack. + __ cmp(remaining_stack_size, scratch); + } + __ b(gt, &done); { FrameScope scope(masm, StackFrame::MANUAL); __ EnterFrame(StackFrame::INTERNAL); @@ -2098,7 +2108,8 @@ void Generate_PushBoundArguments(MacroAssembler* masm) { __ bind(&done); } - Register scratch = r6; + // Reserve stack space for the [[BoundArguments]]. + __ AllocateStackSpace(scratch); // Relocate arguments down the stack. { @@ -2976,7 +2987,7 @@ void Builtins::Generate_CallApiCallback(MacroAssembler* masm) { // sp[5 * kPointerSize]: undefined (kNewTarget) // Reserve space on the stack. - __ sub(sp, sp, Operand(FCA::kArgsLength * kPointerSize)); + __ AllocateStackSpace(FCA::kArgsLength * kPointerSize); // kHolder. __ str(holder, MemOperand(sp, 0 * kPointerSize)); diff --git a/src/builtins/arm64/builtins-arm64.cc b/src/builtins/arm64/builtins-arm64.cc index 8d091131af..390b76a9d6 100644 --- a/src/builtins/arm64/builtins-arm64.cc +++ b/src/builtins/arm64/builtins-arm64.cc @@ -108,26 +108,6 @@ void Generate_StackOverflowCheck(MacroAssembler* masm, Register num_args, // Check if the arguments will overflow the stack. __ Cmp(scratch, Operand(num_args, LSL, kSystemPointerSizeLog2)); __ B(le, stack_overflow); - -#if defined(V8_OS_WIN) - // Simulate _chkstk to extend stack guard page on Windows ARM64. - const int kPageSize = 4096; - Label chkstk, chkstk_done; - Register probe = temps.AcquireX(); - - __ Sub(scratch, sp, Operand(num_args, LSL, kSystemPointerSizeLog2)); - __ Mov(probe, sp); - - // Loop start of stack probe. - __ Bind(&chkstk); - __ Sub(probe, probe, kPageSize); - __ Cmp(probe, scratch); - __ B(lo, &chkstk_done); - __ Ldrb(xzr, MemOperand(probe)); - __ B(&chkstk); - - __ Bind(&chkstk_done); -#endif } void Generate_JSBuiltinsConstructStubHelper(MacroAssembler* masm) { @@ -2500,7 +2480,7 @@ void Generate_PushBoundArguments(MacroAssembler* masm) { __ Sub(x10, sp, x10); // Check if the arguments will overflow the stack. __ Cmp(x10, Operand(bound_argc, LSL, kSystemPointerSizeLog2)); - __ B(hs, &done); + __ B(gt, &done); __ TailCallRuntime(Runtime::kThrowStackOverflow); __ Bind(&done); } @@ -3594,7 +3574,7 @@ void Builtins::Generate_CallApiCallback(MacroAssembler* masm) { // sp[5 * kSystemPointerSize]: undefined (kNewTarget) // Reserve space on the stack. - __ Sub(sp, sp, Operand(FCA::kArgsLength * kSystemPointerSize)); + __ Claim(FCA::kArgsLength, kSystemPointerSize); // kHolder. __ Str(holder, MemOperand(sp, 0 * kSystemPointerSize)); diff --git a/src/builtins/ia32/builtins-ia32.cc b/src/builtins/ia32/builtins-ia32.cc index 1b51ca4238..8d70e2a139 100644 --- a/src/builtins/ia32/builtins-ia32.cc +++ b/src/builtins/ia32/builtins-ia32.cc @@ -405,7 +405,7 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type, __ push(Immediate(StackFrame::TypeToMarker(type))); // Reserve a slot for the context. It is filled after the root register has // been set up. - __ sub(esp, Immediate(kSystemPointerSize)); + __ AllocateStackSpace(kSystemPointerSize); // Save callee-saved registers (C calling conventions). __ push(edi); __ push(esi); @@ -1243,7 +1243,7 @@ void Generate_InterpreterPushZeroAndArgsAndReturnAddress( __ lea(scratch1, Operand(num_args, times_system_pointer_size, kSystemPointerSize)); - __ AllocateStackFrame(scratch1); + __ AllocateStackSpace(scratch1); // Step 2 move return_address and slots around it to the correct locations. // Move from top to bottom, otherwise we may overwrite when num_args = 0 or 1, @@ -2220,7 +2220,7 @@ void Generate_PushBoundArguments(MacroAssembler* masm) { { Label done; __ lea(ecx, Operand(edx, times_system_pointer_size, 0)); - __ sub(esp, ecx); + __ sub(esp, ecx); // Not Windows-friendly, but corrected below. // Check the stack for overflow. We are not trying to catch interruptions // (i.e. debug break and preemption) here, so check the "real stack // limit". @@ -2236,6 +2236,19 @@ void Generate_PushBoundArguments(MacroAssembler* masm) { __ bind(&done); } +#if V8_OS_WIN + // Correctly allocate the stack space that was checked above. + { + Label win_done; + __ cmp(ecx, TurboAssemblerBase::kStackPageSize); + __ j(less_equal, &win_done, Label::kNear); + // Reset esp and walk through the range touching every page. + __ lea(esp, Operand(esp, edx, times_system_pointer_size, 0)); + __ AllocateStackSpace(ecx); + __ bind(&win_done); + } +#endif + // Adjust effective number of arguments to include return address. __ inc(eax); @@ -2649,7 +2662,7 @@ void Builtins::Generate_WasmCompileLazy(MacroAssembler* masm) { static_assert(WasmCompileLazyFrameConstants::kNumberOfSavedFpParamRegs == arraysize(wasm::kFpParamRegisters), "frame size mismatch"); - __ sub(esp, Immediate(kSimd128Size * arraysize(wasm::kFpParamRegisters))); + __ AllocateStackSpace(kSimd128Size * arraysize(wasm::kFpParamRegisters)); int offset = 0; for (DoubleRegister reg : wasm::kFpParamRegisters) { __ movdqu(Operand(esp, offset), reg); @@ -2883,7 +2896,7 @@ void Builtins::Generate_DoubleToI(MacroAssembler* masm) { if (CpuFeatures::IsSupported(SSE3)) { CpuFeatureScope scope(masm, SSE3); // Reserve space for 64 bit answer. - __ sub(esp, Immediate(kDoubleSize)); // Nolint. + __ AllocateStackSpace(kDoubleSize); // Nolint. // Do conversion, which cannot fail because we checked the exponent. __ fisttp_d(Operand(esp, 0)); __ mov(result_reg, Operand(esp, 0)); // Load low word of answer as result diff --git a/src/builtins/x64/builtins-x64.cc b/src/builtins/x64/builtins-x64.cc index a492bb9b09..6e251182e9 100644 --- a/src/builtins/x64/builtins-x64.cc +++ b/src/builtins/x64/builtins-x64.cc @@ -390,7 +390,7 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type, __ Push(Immediate(StackFrame::TypeToMarker(type))); // Reserve a slot for the context. It is filled after the root register has // been set up. - __ subq(rsp, Immediate(kSystemPointerSize)); + __ AllocateStackSpace(kSystemPointerSize); // Save callee-saved registers (X64/X32/Win64 calling conventions). __ pushq(r12); __ pushq(r13); @@ -404,7 +404,7 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type, #ifdef _WIN64 // On Win64 XMM6-XMM15 are callee-save. - __ subq(rsp, Immediate(EntryFrameConstants::kXMMRegistersBlockSize)); + __ AllocateStackSpace(EntryFrameConstants::kXMMRegistersBlockSize); __ movdqu(Operand(rsp, EntryFrameConstants::kXMMRegisterSize * 0), xmm6); __ movdqu(Operand(rsp, EntryFrameConstants::kXMMRegisterSize * 1), xmm7); __ movdqu(Operand(rsp, EntryFrameConstants::kXMMRegisterSize * 2), xmm8); @@ -2332,18 +2332,16 @@ void Generate_PushBoundArguments(MacroAssembler* masm) { // -- rbx : the number of [[BoundArguments]] (checked to be non-zero) // ----------------------------------- - // Reserve stack space for the [[BoundArguments]]. + // Check the stack for overflow. { Label done; - __ leaq(kScratchRegister, Operand(rbx, times_system_pointer_size, 0)); - __ subq(rsp, kScratchRegister); - // Check the stack for overflow. We are not trying to catch interruptions - // (i.e. debug break and preemption) here, so check the "real stack - // limit". - __ CompareRoot(rsp, RootIndex::kRealStackLimit); + __ shlq(rbx, Immediate(kSystemPointerSizeLog2)); + __ movq(kScratchRegister, rsp); + __ subq(kScratchRegister, rbx); + // We are not trying to catch interruptions (i.e. debug break and + // preemption) here, so check the "real stack limit". + __ CompareRoot(kScratchRegister, RootIndex::kRealStackLimit); __ j(above_equal, &done, Label::kNear); - // Restore the stack pointer. - __ leaq(rsp, Operand(rsp, rbx, times_system_pointer_size, 0)); { FrameScope scope(masm, StackFrame::MANUAL); __ EnterFrame(StackFrame::INTERNAL); @@ -2352,6 +2350,10 @@ void Generate_PushBoundArguments(MacroAssembler* masm) { __ bind(&done); } + // Reserve stack space for the [[BoundArguments]]. + __ movq(kScratchRegister, rbx); + __ AllocateStackSpace(kScratchRegister); + // Adjust effective number of arguments to include return address. __ incl(rax); @@ -2359,7 +2361,7 @@ void Generate_PushBoundArguments(MacroAssembler* masm) { { Label loop; __ Set(rcx, 0); - __ leaq(rbx, Operand(rsp, rbx, times_system_pointer_size, 0)); + __ addq(rbx, rsp); __ bind(&loop); __ movq(kScratchRegister, Operand(rbx, rcx, times_system_pointer_size, 0)); @@ -2648,7 +2650,7 @@ void Builtins::Generate_WasmCompileLazy(MacroAssembler* masm) { static_assert(WasmCompileLazyFrameConstants::kNumberOfSavedFpParamRegs == arraysize(wasm::kFpParamRegisters), "frame size mismatch"); - __ subq(rsp, Immediate(kSimd128Size * arraysize(wasm::kFpParamRegisters))); + __ AllocateStackSpace(kSimd128Size * arraysize(wasm::kFpParamRegisters)); int offset = 0; for (DoubleRegister reg : wasm::kFpParamRegisters) { __ movdqu(Operand(rsp, offset), reg); diff --git a/src/compiler/backend/arm/code-generator-arm.cc b/src/compiler/backend/arm/code-generator-arm.cc index 06dc912258..53e3b3efd0 100644 --- a/src/compiler/backend/arm/code-generator-arm.cc +++ b/src/compiler/backend/arm/code-generator-arm.cc @@ -571,7 +571,7 @@ void AdjustStackPointerForTailCall( if (pending_pushes != nullptr) { FlushPendingPushRegisters(tasm, state, pending_pushes); } - tasm->sub(sp, sp, Operand(stack_slot_delta * kSystemPointerSize)); + tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize); state->IncreaseSPDelta(stack_slot_delta); } else if (allow_shrinkage && stack_slot_delta < 0) { if (pending_pushes != nullptr) { @@ -3078,7 +3078,7 @@ void CodeGenerator::AssembleConstructFrame() { required_slots -= frame()->GetReturnSlotCount(); required_slots -= 2 * base::bits::CountPopulation(saves_fp); if (required_slots > 0) { - __ sub(sp, sp, Operand(required_slots * kSystemPointerSize)); + __ AllocateStackSpace(required_slots * kSystemPointerSize); } } @@ -3100,7 +3100,7 @@ void CodeGenerator::AssembleConstructFrame() { const int returns = frame()->GetReturnSlotCount(); if (returns != 0) { // Create space for returns. - __ sub(sp, sp, Operand(returns * kSystemPointerSize)); + __ AllocateStackSpace(returns * kSystemPointerSize); } } diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc index c4ca35472e..d206a6d285 100644 --- a/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/src/compiler/backend/ia32/code-generator-ia32.cc @@ -247,7 +247,7 @@ class OutOfLineTruncateDoubleToI final : public OutOfLineCode { zone_(gen->zone()) {} void Generate() final { - __ sub(esp, Immediate(kDoubleSize)); + __ AllocateStackSpace(kDoubleSize); __ movsd(MemOperand(esp, 0), input_); if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { // A direct call to a wasm runtime stub defined in this module. @@ -360,7 +360,7 @@ class OutOfLineRecordWrite final : public OutOfLineCode { __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \ /* Return value is in st(0) on ia32. */ \ /* Store it into the result register. */ \ - __ sub(esp, Immediate(kDoubleSize)); \ + __ AllocateStackSpace(kDoubleSize); \ __ fstp_d(Operand(esp, 0)); \ __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \ __ add(esp, Immediate(kDoubleSize)); \ @@ -374,7 +374,7 @@ class OutOfLineRecordWrite final : public OutOfLineCode { __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \ /* Return value is in st(0) on ia32. */ \ /* Store it into the result register. */ \ - __ sub(esp, Immediate(kDoubleSize)); \ + __ AllocateStackSpace(kDoubleSize); \ __ fstp_d(Operand(esp, 0)); \ __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \ __ add(esp, Immediate(kDoubleSize)); \ @@ -522,7 +522,7 @@ void AdjustStackPointerForTailCall(TurboAssembler* tasm, StandardFrameConstants::kFixedSlotCountAboveFp; int stack_slot_delta = new_slot_above_sp - current_sp_offset; if (stack_slot_delta > 0) { - tasm->sub(esp, Immediate(stack_slot_delta * kSystemPointerSize)); + tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize); state->IncreaseSPDelta(stack_slot_delta); } else if (allow_shrinkage && stack_slot_delta < 0) { tasm->add(esp, Immediate(-stack_slot_delta * kSystemPointerSize)); @@ -1366,7 +1366,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kSSEFloat64Mod: { Register tmp = i.TempRegister(1); __ mov(tmp, esp); - __ sub(esp, Immediate(kDoubleSize)); + __ AllocateStackSpace(kDoubleSize); __ and_(esp, -8); // align to 8 byte boundary. // Move values to st(0) and st(1). __ movsd(Operand(esp, 0), i.InputDoubleRegister(1)); @@ -1687,45 +1687,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kIA32PushFloat32: if (instr->InputAt(0)->IsFPRegister()) { - __ sub(esp, Immediate(kFloatSize)); + __ AllocateStackSpace(kFloatSize); __ movss(Operand(esp, 0), i.InputDoubleRegister(0)); frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); } else if (HasImmediateInput(instr, 0)) { __ Move(kScratchDoubleReg, i.InputFloat32(0)); - __ sub(esp, Immediate(kFloatSize)); + __ AllocateStackSpace(kFloatSize); __ movss(Operand(esp, 0), kScratchDoubleReg); frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); } else { __ movss(kScratchDoubleReg, i.InputOperand(0)); - __ sub(esp, Immediate(kFloatSize)); + __ AllocateStackSpace(kFloatSize); __ movss(Operand(esp, 0), kScratchDoubleReg); frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); } break; case kIA32PushFloat64: if (instr->InputAt(0)->IsFPRegister()) { - __ sub(esp, Immediate(kDoubleSize)); + __ AllocateStackSpace(kDoubleSize); __ movsd(Operand(esp, 0), i.InputDoubleRegister(0)); frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); } else if (HasImmediateInput(instr, 0)) { __ Move(kScratchDoubleReg, i.InputDouble(0)); - __ sub(esp, Immediate(kDoubleSize)); + __ AllocateStackSpace(kDoubleSize); __ movsd(Operand(esp, 0), kScratchDoubleReg); frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); } else { __ movsd(kScratchDoubleReg, i.InputOperand(0)); - __ sub(esp, Immediate(kDoubleSize)); + __ AllocateStackSpace(kDoubleSize); __ movsd(Operand(esp, 0), kScratchDoubleReg); frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); } break; case kIA32PushSimd128: if (instr->InputAt(0)->IsFPRegister()) { - __ sub(esp, Immediate(kSimd128Size)); + __ AllocateStackSpace(kSimd128Size); __ movups(Operand(esp, 0), i.InputSimd128Register(0)); } else { __ movups(kScratchDoubleReg, i.InputOperand(0)); - __ sub(esp, Immediate(kSimd128Size)); + __ AllocateStackSpace(kSimd128Size); __ movups(Operand(esp, 0), kScratchDoubleReg); } frame_access_state()->IncreaseSPDelta(kSimd128Size / kSystemPointerSize); @@ -1737,7 +1737,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ push(operand); frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); } else if (instr->InputAt(0)->IsFPRegister()) { - __ sub(esp, Immediate(kFloatSize)); + __ AllocateStackSpace(kFloatSize); __ movsd(Operand(esp, 0), i.InputDoubleRegister(0)); frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); } else if (HasImmediateInput(instr, 0)) { @@ -4291,7 +4291,7 @@ void CodeGenerator::AssembleConstructFrame() { required_slots -= base::bits::CountPopulation(saves); required_slots -= frame()->GetReturnSlotCount(); if (required_slots > 0) { - __ sub(esp, Immediate(required_slots * kSystemPointerSize)); + __ AllocateStackSpace(required_slots * kSystemPointerSize); } } @@ -4304,7 +4304,7 @@ void CodeGenerator::AssembleConstructFrame() { // Allocate return slots (located after callee-saved). if (frame()->GetReturnSlotCount() > 0) { - __ sub(esp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize)); + __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize); } } diff --git a/src/compiler/backend/x64/code-generator-x64.cc b/src/compiler/backend/x64/code-generator-x64.cc index 87e5fbf2fe..a6902ac1b6 100644 --- a/src/compiler/backend/x64/code-generator-x64.cc +++ b/src/compiler/backend/x64/code-generator-x64.cc @@ -201,7 +201,7 @@ class OutOfLineTruncateDoubleToI final : public OutOfLineCode { zone_(gen->zone()) {} void Generate() final { - __ subq(rsp, Immediate(kDoubleSize)); + __ AllocateStackSpace(kDoubleSize); unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(), kDoubleSize); __ Movsd(MemOperand(rsp, 0), input_); @@ -615,7 +615,7 @@ void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg, namespace { -void AdjustStackPointerForTailCall(Assembler* assembler, +void AdjustStackPointerForTailCall(TurboAssembler* assembler, FrameAccessState* state, int new_slot_above_sp, bool allow_shrinkage = true) { @@ -623,7 +623,7 @@ void AdjustStackPointerForTailCall(Assembler* assembler, StandardFrameConstants::kFixedSlotCountAboveFp; int stack_slot_delta = new_slot_above_sp - current_sp_offset; if (stack_slot_delta > 0) { - assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize)); + assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize); state->IncreaseSPDelta(stack_slot_delta); } else if (allow_shrinkage && stack_slot_delta < 0) { assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize)); @@ -1360,7 +1360,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister()); break; case kSSEFloat64Mod: { - __ subq(rsp, Immediate(kDoubleSize)); + __ AllocateStackSpace(kDoubleSize); unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), kDoubleSize); // Move values to st(0) and st(1). @@ -2178,14 +2178,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } else if (instr->InputAt(0)->IsFloatRegister() || instr->InputAt(0)->IsDoubleRegister()) { // TODO(titzer): use another machine instruction? - __ subq(rsp, Immediate(kDoubleSize)); + __ AllocateStackSpace(kDoubleSize); frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), kDoubleSize); __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); } else if (instr->InputAt(0)->IsSimd128Register()) { // TODO(titzer): use another machine instruction? - __ subq(rsp, Immediate(kSimd128Size)); + __ AllocateStackSpace(kSimd128Size); frame_access_state()->IncreaseSPDelta(kSimd128Size / kSystemPointerSize); unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), @@ -2202,7 +2202,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( DCHECK(instr->InputAt(0)->IsSimd128StackSlot()); __ Movups(kScratchDoubleReg, i.InputOperand(0)); // TODO(titzer): use another machine instruction? - __ subq(rsp, Immediate(kSimd128Size)); + __ AllocateStackSpace(kSimd128Size); frame_access_state()->IncreaseSPDelta(kSimd128Size / kSystemPointerSize); unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), @@ -3819,7 +3819,7 @@ void CodeGenerator::AssembleConstructFrame() { (kQuadWordSize / kSystemPointerSize); required_slots -= frame()->GetReturnSlotCount(); if (required_slots > 0) { - __ subq(rsp, Immediate(required_slots * kSystemPointerSize)); + __ AllocateStackSpace(required_slots * kSystemPointerSize); } } @@ -3827,7 +3827,7 @@ void CodeGenerator::AssembleConstructFrame() { const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp); const int stack_size = saves_fp_count * kQuadWordSize; // Adjust the stack pointer. - __ subq(rsp, Immediate(stack_size)); + __ AllocateStackSpace(stack_size); // Store the registers on the stack. int slot_idx = 0; for (int i = 0; i < XMMRegister::kNumRegisters; i++) { @@ -3847,7 +3847,7 @@ void CodeGenerator::AssembleConstructFrame() { // Allocate return slots (located after callee-saved). if (frame()->GetReturnSlotCount() > 0) { - __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize)); + __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize); } } diff --git a/src/ia32/assembler-ia32.h b/src/ia32/assembler-ia32.h index 142c29daa7..ba29da1ce5 100644 --- a/src/ia32/assembler-ia32.h +++ b/src/ia32/assembler-ia32.h @@ -752,7 +752,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { void jmp(Register reg) { jmp(Operand(reg)); } void jmp(Operand adr); void jmp(Handle code, RelocInfo::Mode rmode); - // unconditionoal jump relative to the current address. Low-level rountine, + // Unconditional jump relative to the current address. Low-level routine, // use with caution! void jmp_rel(int offset); diff --git a/src/ia32/deoptimizer-ia32.cc b/src/ia32/deoptimizer-ia32.cc index 34427c95ed..5d4ffeae07 100644 --- a/src/ia32/deoptimizer-ia32.cc +++ b/src/ia32/deoptimizer-ia32.cc @@ -24,7 +24,7 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, const int kNumberOfRegisters = Register::kNumRegisters; const int kDoubleRegsSize = kDoubleSize * XMMRegister::kNumRegisters; - __ sub(esp, Immediate(kDoubleRegsSize)); + __ AllocateStackSpace(kDoubleRegsSize); const RegisterConfiguration* config = RegisterConfiguration::Default(); for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { int code = config->GetAllocatableDoubleCode(i); @@ -35,7 +35,7 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, STATIC_ASSERT(kFloatSize == kSystemPointerSize); const int kFloatRegsSize = kFloatSize * XMMRegister::kNumRegisters; - __ sub(esp, Immediate(kFloatRegsSize)); + __ AllocateStackSpace(kFloatRegsSize); for (int i = 0; i < config->num_allocatable_float_registers(); ++i) { int code = config->GetAllocatableFloatCode(i); XMMRegister xmm_reg = XMMRegister::from_code(code); diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc index b599d9ff3c..7aa3a43aef 100644 --- a/src/ia32/macro-assembler-ia32.cc +++ b/src/ia32/macro-assembler-ia32.cc @@ -276,7 +276,7 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, if (fp_mode == kSaveFPRegs) { // Save all XMM registers except XMM0. int delta = kDoubleSize * (XMMRegister::kNumRegisters - 1); - sub(esp, Immediate(delta)); + AllocateStackSpace(delta); for (int i = XMMRegister::kNumRegisters - 1; i > 0; i--) { XMMRegister reg = XMMRegister::from_code(i); movsd(Operand(esp, (i - 1) * kDoubleSize), reg); @@ -798,27 +798,35 @@ void TurboAssembler::LeaveFrame(StackFrame::Type type) { } #ifdef V8_OS_WIN -void TurboAssembler::AllocateStackFrame(Register bytes_scratch) { +void TurboAssembler::AllocateStackSpace(Register bytes_scratch) { // In windows, we cannot increment the stack size by more than one page // (minimum page size is 4KB) without accessing at least one byte on the // page. Check this: // https://msdn.microsoft.com/en-us/library/aa227153(v=vs.60).aspx. - constexpr int kPageSize = 4 * 1024; Label check_offset; Label touch_next_page; jmp(&check_offset); bind(&touch_next_page); - sub(esp, Immediate(kPageSize)); + sub(esp, Immediate(kStackPageSize)); // Just to touch the page, before we increment further. mov(Operand(esp, 0), Immediate(0)); - sub(bytes_scratch, Immediate(kPageSize)); + sub(bytes_scratch, Immediate(kStackPageSize)); bind(&check_offset); - cmp(bytes_scratch, kPageSize); + cmp(bytes_scratch, kStackPageSize); j(greater, &touch_next_page); sub(esp, bytes_scratch); } + +void TurboAssembler::AllocateStackSpace(int bytes) { + while (bytes > kStackPageSize) { + sub(esp, Immediate(kStackPageSize)); + mov(Operand(esp, 0), Immediate(0)); + bytes -= kStackPageSize; + } + sub(esp, Immediate(bytes)); +} #endif void MacroAssembler::EnterExitFramePrologue(StackFrame::Type frame_type, @@ -861,14 +869,14 @@ void MacroAssembler::EnterExitFrameEpilogue(int argc, bool save_doubles) { if (save_doubles) { int space = XMMRegister::kNumRegisters * kDoubleSize + argc * kSystemPointerSize; - sub(esp, Immediate(space)); + AllocateStackSpace(space); const int offset = -ExitFrameConstants::kFixedFrameSizeFromFp; for (int i = 0; i < XMMRegister::kNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); movsd(Operand(ebp, offset - ((i + 1) * kDoubleSize)), reg); } } else { - sub(esp, Immediate(argc * kSystemPointerSize)); + AllocateStackSpace(argc * kSystemPointerSize); } // Get the required frame alignment for the OS. @@ -1631,7 +1639,7 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) { // We don't have an xmm scratch register, so move the data via the stack. This // path is rarely required, so it's acceptable to be slow. DCHECK_LT(imm8, 2); - sub(esp, Immediate(kDoubleSize)); + AllocateStackSpace(kDoubleSize); movsd(Operand(esp, 0), src); mov(dst, Operand(esp, imm8 * kUInt32Size)); add(esp, Immediate(kDoubleSize)); @@ -1652,7 +1660,7 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) { // We don't have an xmm scratch register, so move the data via the stack. This // path is rarely required, so it's acceptable to be slow. DCHECK_LT(imm8, 2); - sub(esp, Immediate(kDoubleSize)); + AllocateStackSpace(kDoubleSize); // Write original content of {dst} to the stack. movsd(Operand(esp, 0), dst); // Overwrite the portion specified in {imm8}. @@ -1812,12 +1820,12 @@ void TurboAssembler::PrepareCallCFunction(int num_arguments, Register scratch) { // Make stack end at alignment and make room for num_arguments words // and the original value of esp. mov(scratch, esp); - sub(esp, Immediate((num_arguments + 1) * kSystemPointerSize)); + AllocateStackSpace((num_arguments + 1) * kSystemPointerSize); DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); and_(esp, -frame_alignment); mov(Operand(esp, num_arguments * kSystemPointerSize), scratch); } else { - sub(esp, Immediate(num_arguments * kSystemPointerSize)); + AllocateStackSpace(num_arguments * kSystemPointerSize); } } diff --git a/src/ia32/macro-assembler-ia32.h b/src/ia32/macro-assembler-ia32.h index 7a7591c6f0..f43be00854 100644 --- a/src/ia32/macro-assembler-ia32.h +++ b/src/ia32/macro-assembler-ia32.h @@ -40,17 +40,17 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { } void LeaveFrame(StackFrame::Type type); -// Allocate a stack frame of given size (i.e. decrement {esp} by the value -// stored in the given register). +// Allocate stack space of given size (i.e. decrement {esp} by the value +// stored in the given register, or by a constant). If you need to perform a +// stack check, do it before calling this function because this function may +// write into the newly allocated space. It may also overwrite the given +// register's value, in the version that takes a register. #ifdef V8_OS_WIN - // On win32, take special care if the number of bytes is greater than 4096: - // Ensure that each page within the new stack frame is touched once in - // decreasing order. See - // https://msdn.microsoft.com/en-us/library/aa227153(v=vs.60).aspx. - // Use {bytes_scratch} as scratch register for this procedure. - void AllocateStackFrame(Register bytes_scratch); + void AllocateStackSpace(Register bytes_scratch); + void AllocateStackSpace(int bytes); #else - void AllocateStackFrame(Register bytes) { sub(esp, bytes); } + void AllocateStackSpace(Register bytes) { sub(esp, bytes); } + void AllocateStackSpace(int bytes) { sub(esp, Immediate(bytes)); } #endif // Print a message to stdout and abort execution. diff --git a/src/regexp/arm/regexp-macro-assembler-arm.cc b/src/regexp/arm/regexp-macro-assembler-arm.cc index 2c89598b0d..c20e0df40b 100644 --- a/src/regexp/arm/regexp-macro-assembler-arm.cc +++ b/src/regexp/arm/regexp-macro-assembler-arm.cc @@ -671,7 +671,7 @@ Handle RegExpMacroAssemblerARM::GetCode(Handle source) { __ bind(&stack_ok); // Allocate space on stack for registers. - __ sub(sp, sp, Operand(num_registers_ * kPointerSize)); + __ AllocateStackSpace(num_registers_ * kPointerSize); // Load string end. __ ldr(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); // Load input start. @@ -1058,7 +1058,7 @@ void RegExpMacroAssemblerARM::CallCheckStackGuardState() { // We need to make room for the return address on the stack. int stack_alignment = base::OS::ActivationFrameAlignment(); DCHECK(IsAligned(stack_alignment, kPointerSize)); - __ sub(sp, sp, Operand(stack_alignment)); + __ AllocateStackSpace(stack_alignment); // r0 will point to the return address, placed by DirectCEntry. __ mov(r0, sp); diff --git a/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/src/regexp/ia32/regexp-macro-assembler-ia32.cc index 0d6036c3a9..bff703fcda 100644 --- a/src/regexp/ia32/regexp-macro-assembler-ia32.cc +++ b/src/regexp/ia32/regexp-macro-assembler-ia32.cc @@ -709,7 +709,7 @@ Handle RegExpMacroAssemblerIA32::GetCode(Handle source) { __ mov(ebx, Operand(ebp, kStartIndex)); // Allocate space on stack for registers. - __ sub(esp, Immediate(num_registers_ * kSystemPointerSize)); + __ AllocateStackSpace(num_registers_ * kSystemPointerSize); // Load string length. __ mov(esi, Operand(ebp, kInputEnd)); // Load input position. @@ -729,18 +729,6 @@ Handle RegExpMacroAssemblerIA32::GetCode(Handle source) { // position registers. __ mov(Operand(ebp, kStringStartMinusOne), eax); -#if V8_OS_WIN - // Ensure that we write to each stack page, in order. Skipping a page - // on Windows can cause segmentation faults. Assuming page size is 4k. - const int kPageSize = 4096; - const int kRegistersPerPage = kPageSize / kSystemPointerSize; - for (int i = num_saved_registers_ + kRegistersPerPage - 1; - i < num_registers_; - i += kRegistersPerPage) { - __ mov(register_location(i), eax); // One write every page. - } -#endif // V8_OS_WIN - Label load_char_start_regexp, start_regexp; // Load newline if index is at start, previous character otherwise. __ cmp(Operand(ebp, kStartIndex), Immediate(0)); diff --git a/src/regexp/x64/regexp-macro-assembler-x64.cc b/src/regexp/x64/regexp-macro-assembler-x64.cc index a2ecd72adb..4be2d069bb 100644 --- a/src/regexp/x64/regexp-macro-assembler-x64.cc +++ b/src/regexp/x64/regexp-macro-assembler-x64.cc @@ -746,7 +746,7 @@ Handle RegExpMacroAssemblerX64::GetCode(Handle source) { __ bind(&stack_ok); // Allocate space on stack for registers. - __ subq(rsp, Immediate(num_registers_ * kSystemPointerSize)); + __ AllocateStackSpace(num_registers_ * kSystemPointerSize); // Load string length. __ movq(rsi, Operand(rbp, kInputEnd)); // Load input position. @@ -766,18 +766,6 @@ Handle RegExpMacroAssemblerX64::GetCode(Handle source) { // position registers. __ movq(Operand(rbp, kStringStartMinusOne), rax); -#if V8_OS_WIN - // Ensure that we have written to each stack page, in order. Skipping a page - // on Windows can cause segmentation faults. Assuming page size is 4k. - const int kPageSize = 4096; - const int kRegistersPerPage = kPageSize / kSystemPointerSize; - for (int i = num_saved_registers_ + kRegistersPerPage - 1; - i < num_registers_; - i += kRegistersPerPage) { - __ movq(register_location(i), rax); // One write every page. - } -#endif // V8_OS_WIN - // Initialize code object pointer. __ Move(code_object_pointer(), masm_.CodeObject()); diff --git a/src/turbo-assembler.h b/src/turbo-assembler.h index 5ecd41c758..3ff769a43f 100644 --- a/src/turbo-assembler.h +++ b/src/turbo-assembler.h @@ -104,6 +104,12 @@ class V8_EXPORT_PRIVATE TurboAssemblerBase : public Assembler { static bool IsAddressableThroughRootRegister( Isolate* isolate, const ExternalReference& reference); +#if V8_OS_WIN + // Minimum page size. We must touch memory once per page when expanding the + // stack, to avoid access violations. + static constexpr int kStackPageSize = 4 * KB; +#endif + protected: void RecordCommentForOffHeapTrampoline(int builtin_index); diff --git a/src/wasm/baseline/arm/liftoff-assembler-arm.h b/src/wasm/baseline/arm/liftoff-assembler-arm.h index d41d27e96a..b2cd566873 100644 --- a/src/wasm/baseline/arm/liftoff-assembler-arm.h +++ b/src/wasm/baseline/arm/liftoff-assembler-arm.h @@ -254,6 +254,28 @@ void LiftoffAssembler::PatchPrepareStackFrame(int offset, PatchingAssembler patching_assembler(AssemblerOptions{}, buffer_start_ + offset, liftoff::kPatchInstructionsRequired); +#if V8_OS_WIN + if (bytes > kStackPageSize) { + // Generate OOL code (at the end of the function, where the current + // assembler is pointing) to do the explicit stack limit check (see + // https://docs.microsoft.com/en-us/previous-versions/visualstudio/ + // visual-studio-6.0/aa227153(v=vs.60)). + // At the function start, emit a jump to that OOL code (from {offset} to + // {pc_offset()}). + int ool_offset = pc_offset() - offset; + patching_assembler.b(ool_offset - Instruction::kPcLoadDelta); + patching_assembler.PadWithNops(); + + // Now generate the OOL code. + AllocateStackSpace(bytes); + // Jump back to the start of the function (from {pc_offset()} to {offset + + // liftoff::kPatchInstructionsRequired * kInstrSize}). + int func_start_offset = + offset + liftoff::kPatchInstructionsRequired * kInstrSize - pc_offset(); + b(func_start_offset - Instruction::kPcLoadDelta); + return; + } +#endif patching_assembler.sub(sp, sp, Operand(bytes)); patching_assembler.PadWithNops(); } @@ -1381,7 +1403,7 @@ void LiftoffAssembler::CallC(wasm::FunctionSig* sig, // a pointer to them. DCHECK(IsAligned(stack_bytes, kSystemPointerSize)); // Reserve space in the stack. - sub(sp, sp, Operand(stack_bytes)); + AllocateStackSpace(stack_bytes); int arg_bytes = 0; for (ValueType param_type : sig->parameters()) { @@ -1467,7 +1489,7 @@ void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) { } void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) { - sub(sp, sp, Operand(size)); + AllocateStackSpace(size); mov(addr, sp); } diff --git a/src/wasm/baseline/arm64/liftoff-assembler-arm64.h b/src/wasm/baseline/arm64/liftoff-assembler-arm64.h index ae6d5c0cf2..b1d71dce2f 100644 --- a/src/wasm/baseline/arm64/liftoff-assembler-arm64.h +++ b/src/wasm/baseline/arm64/liftoff-assembler-arm64.h @@ -150,6 +150,26 @@ void LiftoffAssembler::PatchPrepareStackFrame(int offset, #endif PatchingAssembler patching_assembler(AssemblerOptions{}, buffer_start_ + offset, 1); +#if V8_OS_WIN + if (bytes > kStackPageSize) { + // Generate OOL code (at the end of the function, where the current + // assembler is pointing) to do the explicit stack limit check (see + // https://docs.microsoft.com/en-us/previous-versions/visualstudio/ + // visual-studio-6.0/aa227153(v=vs.60)). + // At the function start, emit a jump to that OOL code (from {offset} to + // {pc_offset()}). + int ool_offset = pc_offset() - offset; + patching_assembler.b(ool_offset >> kInstrSizeLog2); + + // Now generate the OOL code. + Claim(bytes, 1); + // Jump back to the start of the function (from {pc_offset()} to {offset + + // kInstrSize}). + int func_start_offset = offset + kInstrSize - pc_offset(); + b(func_start_offset >> kInstrSizeLog2); + return; + } +#endif patching_assembler.PatchSubSp(bytes); } diff --git a/src/wasm/baseline/ia32/liftoff-assembler-ia32.h b/src/wasm/baseline/ia32/liftoff-assembler-ia32.h index 983d59277d..695db27f19 100644 --- a/src/wasm/baseline/ia32/liftoff-assembler-ia32.h +++ b/src/wasm/baseline/ia32/liftoff-assembler-ia32.h @@ -101,11 +101,11 @@ inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueType type) { assm->push(reg.low_gp()); break; case kWasmF32: - assm->sub(esp, Immediate(sizeof(float))); + assm->AllocateStackSpace(sizeof(float)); assm->movss(Operand(esp, 0), reg.fp()); break; case kWasmF64: - assm->sub(esp, Immediate(sizeof(double))); + assm->AllocateStackSpace(sizeof(double)); assm->movsd(Operand(esp, 0), reg.fp()); break; default: @@ -171,8 +171,7 @@ void LiftoffAssembler::PatchPrepareStackFrame(int offset, AssemblerOptions{}, ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace)); #if V8_OS_WIN - constexpr int kPageSize = 4 * 1024; - if (bytes > kPageSize) { + if (bytes > kStackPageSize) { // Generate OOL code (at the end of the function, where the current // assembler is pointing) to do the explicit stack limit check (see // https://docs.microsoft.com/en-us/previous-versions/visualstudio/ @@ -186,10 +185,7 @@ void LiftoffAssembler::PatchPrepareStackFrame(int offset, patching_assembler.pc_offset()); // Now generate the OOL code. - // Use {edi} as scratch register; it is not being used as parameter - // register (see wasm-linkage.h). - mov(edi, bytes); - AllocateStackFrame(edi); + AllocateStackSpace(bytes); // Jump back to the start of the function (from {pc_offset()} to {offset + // kSubSpSize}). int func_start_offset = offset + liftoff::kSubSpSize - pc_offset(); @@ -1459,7 +1455,7 @@ bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, return true; case kExprI64ReinterpretF64: // Push src to the stack. - sub(esp, Immediate(8)); + AllocateStackSpace(8); movsd(Operand(esp, 0), src.fp()); // Pop to dst. pop(dst.low_gp()); @@ -1709,7 +1705,7 @@ void LiftoffAssembler::PushRegisters(LiftoffRegList regs) { LiftoffRegList fp_regs = regs & kFpCacheRegList; unsigned num_fp_regs = fp_regs.GetNumRegsSet(); if (num_fp_regs) { - sub(esp, Immediate(num_fp_regs * kStackSlotSize)); + AllocateStackSpace(num_fp_regs * kStackSlotSize); unsigned offset = 0; while (!fp_regs.is_empty()) { LiftoffRegister reg = fp_regs.GetFirstRegSet(); @@ -1750,7 +1746,7 @@ void LiftoffAssembler::CallC(wasm::FunctionSig* sig, const LiftoffRegister* rets, ValueType out_argument_type, int stack_bytes, ExternalReference ext_ref) { - sub(esp, Immediate(stack_bytes)); + AllocateStackSpace(stack_bytes); int arg_bytes = 0; for (ValueType param_type : sig->parameters()) { @@ -1815,7 +1811,7 @@ void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) { } void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) { - sub(esp, Immediate(size)); + AllocateStackSpace(size); mov(addr, esp); } diff --git a/src/wasm/baseline/x64/liftoff-assembler-x64.h b/src/wasm/baseline/x64/liftoff-assembler-x64.h index 5055bdae4d..98f69e8ef5 100644 --- a/src/wasm/baseline/x64/liftoff-assembler-x64.h +++ b/src/wasm/baseline/x64/liftoff-assembler-x64.h @@ -112,11 +112,11 @@ inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueType type) { assm->pushq(reg.gp()); break; case kWasmF32: - assm->subq(rsp, Immediate(kSystemPointerSize)); + assm->AllocateStackSpace(kSystemPointerSize); assm->Movss(Operand(rsp, 0), reg.fp()); break; case kWasmF64: - assm->subq(rsp, Immediate(kSystemPointerSize)); + assm->AllocateStackSpace(kSystemPointerSize); assm->Movsd(Operand(rsp, 0), reg.fp()); break; default: @@ -131,11 +131,14 @@ inline void SpillRegisters(LiftoffAssembler* assm, Regs... regs) { } } +constexpr int kSubSpSize = 7; // 7 bytes for "subq rsp, " + } // namespace liftoff int LiftoffAssembler::PrepareStackFrame() { int offset = pc_offset(); sub_sp_32(0); + DCHECK_EQ(liftoff::kSubSpSize, pc_offset() - offset); return offset; } @@ -149,7 +152,30 @@ void LiftoffAssembler::PatchPrepareStackFrame(int offset, Assembler patching_assembler( AssemblerOptions{}, ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace)); +#if V8_OS_WIN + if (bytes > kStackPageSize) { + // Generate OOL code (at the end of the function, where the current + // assembler is pointing) to do the explicit stack limit check (see + // https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-6.0/aa227153(v=vs.60)). + // At the function start, emit a jump to that OOL code (from {offset} to + // {pc_offset()}). + int ool_offset = pc_offset() - offset; + patching_assembler.jmp_rel(ool_offset); + DCHECK_GE(liftoff::kSubSpSize, patching_assembler.pc_offset()); + patching_assembler.Nop(liftoff::kSubSpSize - + patching_assembler.pc_offset()); + + // Now generate the OOL code. + AllocateStackSpace(bytes); + // Jump back to the start of the function (from {pc_offset()} to {offset + + // kSubSpSize}). + int func_start_offset = offset + liftoff::kSubSpSize - pc_offset(); + jmp_rel(func_start_offset); + return; + } +#endif patching_assembler.sub_sp_32(bytes); + DCHECK_EQ(liftoff::kSubSpSize, patching_assembler.pc_offset()); } void LiftoffAssembler::FinishCode() {} @@ -1493,7 +1519,7 @@ void LiftoffAssembler::PushRegisters(LiftoffRegList regs) { LiftoffRegList fp_regs = regs & kFpCacheRegList; unsigned num_fp_regs = fp_regs.GetNumRegsSet(); if (num_fp_regs) { - subq(rsp, Immediate(num_fp_regs * kStackSlotSize)); + AllocateStackSpace(num_fp_regs * kStackSlotSize); unsigned offset = 0; while (!fp_regs.is_empty()) { LiftoffRegister reg = fp_regs.GetFirstRegSet(); @@ -1534,7 +1560,7 @@ void LiftoffAssembler::CallC(wasm::FunctionSig* sig, const LiftoffRegister* rets, ValueType out_argument_type, int stack_bytes, ExternalReference ext_ref) { - subq(rsp, Immediate(stack_bytes)); + AllocateStackSpace(stack_bytes); int arg_bytes = 0; for (ValueType param_type : sig->parameters()) { @@ -1596,7 +1622,7 @@ void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) { } void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) { - subq(rsp, Immediate(size)); + AllocateStackSpace(size); movq(addr, rsp); } diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc index ecddede53c..be7d93ab03 100644 --- a/src/x64/assembler-x64.cc +++ b/src/x64/assembler-x64.cc @@ -1566,24 +1566,33 @@ void Assembler::j(Condition cc, emitl(code_target_index); } - -void Assembler::jmp(Label* L, Label::Distance distance) { - EnsureSpace ensure_space(this); +void Assembler::jmp_rel(int offset) { const int short_size = sizeof(int8_t); const int long_size = sizeof(int32_t); + --offset; // This is how jumps are specified on x64. + if (is_int8(offset - short_size) && !predictable_code_size()) { + // 1110 1011 #8-bit disp. + emit(0xEB); + emit((offset - short_size) & 0xFF); + } else { + // 1110 1001 #32-bit disp. + emit(0xE9); + emitl(offset - long_size); + } +} + +void Assembler::jmp(Label* L, Label::Distance distance) { + const int long_size = sizeof(int32_t); + if (L->is_bound()) { - int offs = L->pos() - pc_offset() - 1; - DCHECK_LE(offs, 0); - if (is_int8(offs - short_size) && !predictable_code_size()) { - // 1110 1011 #8-bit disp. - emit(0xEB); - emit((offs - short_size) & 0xFF); - } else { - // 1110 1001 #32-bit disp. - emit(0xE9); - emitl(offs - long_size); - } - } else if (distance == Label::kNear) { + int offset = L->pos() - pc_offset(); + DCHECK_LE(offset, 0); // backward jump. + jmp_rel(offset); + return; + } + + EnsureSpace ensure_space(this); + if (distance == Label::kNear) { emit(0xEB); byte disp = 0x00; if (L->is_near_linked()) { @@ -1622,7 +1631,6 @@ void Assembler::jmp(Label* L, Label::Distance distance) { } } - void Assembler::jmp(Handle target, RelocInfo::Mode rmode) { DCHECK(RelocInfo::IsCodeTarget(rmode)); EnsureSpace ensure_space(this); diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h index d58292248a..3f89e47e93 100644 --- a/src/x64/assembler-x64.h +++ b/src/x64/assembler-x64.h @@ -764,6 +764,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { void jmp(Register adr); void jmp(Operand src); + // Unconditional jump relative to the current address. Low-level routine, + // use with caution! + void jmp_rel(int offset); + // Conditional jumps void j(Condition cc, Label* L, diff --git a/src/x64/deoptimizer-x64.cc b/src/x64/deoptimizer-x64.cc index f49a131a26..3bfeb659c7 100644 --- a/src/x64/deoptimizer-x64.cc +++ b/src/x64/deoptimizer-x64.cc @@ -24,7 +24,7 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, const int kNumberOfRegisters = Register::kNumRegisters; const int kDoubleRegsSize = kDoubleSize * XMMRegister::kNumRegisters; - __ subq(rsp, Immediate(kDoubleRegsSize)); + __ AllocateStackSpace(kDoubleRegsSize); const RegisterConfiguration* config = RegisterConfiguration::Default(); for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { @@ -35,7 +35,7 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, } const int kFloatRegsSize = kFloatSize * XMMRegister::kNumRegisters; - __ subq(rsp, Immediate(kFloatRegsSize)); + __ AllocateStackSpace(kFloatRegsSize); for (int i = 0; i < config->num_allocatable_float_registers(); ++i) { int code = config->GetAllocatableFloatCode(i); diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc index 29b1315d01..81809620e7 100644 --- a/src/x64/macro-assembler-x64.cc +++ b/src/x64/macro-assembler-x64.cc @@ -723,7 +723,7 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, // R12 to r15 are callee save on all platforms. if (fp_mode == kSaveFPRegs) { int delta = kDoubleSize * XMMRegister::kNumRegisters; - subq(rsp, Immediate(delta)); + AllocateStackSpace(delta); for (int i = 0; i < XMMRegister::kNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); Movsd(Operand(rsp, i * kDoubleSize), reg); @@ -2480,6 +2480,38 @@ void TurboAssembler::LeaveFrame(StackFrame::Type type) { popq(rbp); } +#ifdef V8_OS_WIN +void TurboAssembler::AllocateStackSpace(Register bytes_scratch) { + // In windows, we cannot increment the stack size by more than one page + // (minimum page size is 4KB) without accessing at least one byte on the + // page. Check this: + // https://msdn.microsoft.com/en-us/library/aa227153(v=vs.60).aspx. + Label check_offset; + Label touch_next_page; + jmp(&check_offset); + bind(&touch_next_page); + subq(rsp, Immediate(kStackPageSize)); + // Just to touch the page, before we increment further. + movb(Operand(rsp, 0), Immediate(0)); + subq(bytes_scratch, Immediate(kStackPageSize)); + + bind(&check_offset); + cmpq(bytes_scratch, Immediate(kStackPageSize)); + j(greater, &touch_next_page); + + subq(rsp, bytes_scratch); +} + +void TurboAssembler::AllocateStackSpace(int bytes) { + while (bytes > kStackPageSize) { + subq(rsp, Immediate(kStackPageSize)); + movb(Operand(rsp, 0), Immediate(0)); + bytes -= kStackPageSize; + } + subq(rsp, Immediate(bytes)); +} +#endif + void MacroAssembler::EnterExitFramePrologue(bool save_rax, StackFrame::Type frame_type) { DCHECK(frame_type == StackFrame::EXIT || @@ -2525,7 +2557,7 @@ void MacroAssembler::EnterExitFrameEpilogue(int arg_stack_space, if (save_doubles) { int space = XMMRegister::kNumRegisters * kDoubleSize + arg_stack_space * kSystemPointerSize; - subq(rsp, Immediate(space)); + AllocateStackSpace(space); int offset = -ExitFrameConstants::kFixedFrameSizeFromFp; const RegisterConfiguration* config = RegisterConfiguration::Default(); for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { @@ -2534,7 +2566,7 @@ void MacroAssembler::EnterExitFrameEpilogue(int arg_stack_space, Movsd(Operand(rbp, offset - ((i + 1) * kDoubleSize)), reg); } } else if (arg_stack_space > 0) { - subq(rsp, Immediate(arg_stack_space * kSystemPointerSize)); + AllocateStackSpace(arg_stack_space * kSystemPointerSize); } // Get the required frame alignment for the OS. @@ -2665,7 +2697,7 @@ void TurboAssembler::PrepareCallCFunction(int num_arguments) { DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); int argument_slots_on_stack = ArgumentStackSlotsForCFunctionCall(num_arguments); - subq(rsp, Immediate((argument_slots_on_stack + 1) * kSystemPointerSize)); + AllocateStackSpace((argument_slots_on_stack + 1) * kSystemPointerSize); andq(rsp, Immediate(-frame_alignment)); movq(Operand(rsp, argument_slots_on_stack * kSystemPointerSize), kScratchRegister); diff --git a/src/x64/macro-assembler-x64.h b/src/x64/macro-assembler-x64.h index aed7aed363..20ad209a7f 100644 --- a/src/x64/macro-assembler-x64.h +++ b/src/x64/macro-assembler-x64.h @@ -416,6 +416,19 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { } void LeaveFrame(StackFrame::Type type); +// Allocate stack space of given size (i.e. decrement {rsp} by the value +// stored in the given register, or by a constant). If you need to perform a +// stack check, do it before calling this function because this function may +// write into the newly allocated space. It may also overwrite the given +// register's value, in the version that takes a register. +#ifdef V8_OS_WIN + void AllocateStackSpace(Register bytes_scratch); + void AllocateStackSpace(int bytes); +#else + void AllocateStackSpace(Register bytes) { subq(rsp, bytes); } + void AllocateStackSpace(int bytes) { subq(rsp, Immediate(bytes)); } +#endif + // Removes current frame and its arguments from the stack preserving the // arguments and a return address pushed to the stack for the next call. Both // |callee_args_count| and |caller_args_count_reg| do not include receiver. diff --git a/test/cctest/test-assembler-arm64.cc b/test/cctest/test-assembler-arm64.cc index 8155508480..2247d8563e 100644 --- a/test/cctest/test-assembler-arm64.cc +++ b/test/cctest/test-assembler-arm64.cc @@ -6984,10 +6984,6 @@ TEST(claim_drop_zero) { __ Drop(xzr, 0); __ Claim(x7, 0); __ Drop(x7, 0); - __ ClaimBySMI(xzr, 8); - __ DropBySMI(xzr, 8); - __ ClaimBySMI(xzr, 0); - __ DropBySMI(xzr, 0); CHECK_EQ(0u, __ SizeOfCodeGeneratedSince(&start)); END(); diff --git a/test/cctest/test-assembler-ia32.cc b/test/cctest/test-assembler-ia32.cc index 291be9f61d..9ac625ffe3 100644 --- a/test/cctest/test-assembler-ia32.cc +++ b/test/cctest/test-assembler-ia32.cc @@ -590,7 +590,7 @@ TEST(AssemblerX64FMA_sd) { __ mulsd(xmm3, xmm1); __ addsd(xmm3, xmm2); // Expected result in xmm3 - __ sub(esp, Immediate(kDoubleSize)); // For memory operand + __ AllocateStackSpace(kDoubleSize); // For memory operand // vfmadd132sd __ mov(eax, Immediate(1)); // Test number __ movaps(xmm4, xmm0); @@ -818,7 +818,7 @@ TEST(AssemblerX64FMA_ss) { __ mulss(xmm3, xmm1); __ addss(xmm3, xmm2); // Expected result in xmm3 - __ sub(esp, Immediate(kDoubleSize)); // For memory operand + __ AllocateStackSpace(kDoubleSize); // For memory operand // vfmadd132ss __ mov(eax, Immediate(1)); // Test number __ movaps(xmm4, xmm0); diff --git a/test/cctest/test-assembler-x64.cc b/test/cctest/test-assembler-x64.cc index 61d24187c0..3ba3ea7cb3 100644 --- a/test/cctest/test-assembler-x64.cc +++ b/test/cctest/test-assembler-x64.cc @@ -958,7 +958,7 @@ TEST(AssemblerX64FMA_sd) { __ mulsd(xmm3, xmm1); __ addsd(xmm3, xmm2); // Expected result in xmm3 - __ subq(rsp, Immediate(kDoubleSize)); // For memory operand + __ AllocateStackSpace(kDoubleSize); // For memory operand // vfmadd132sd __ movl(rax, Immediate(1)); // Test number __ movaps(xmm8, xmm0); @@ -1183,7 +1183,7 @@ TEST(AssemblerX64FMA_ss) { __ mulss(xmm3, xmm1); __ addss(xmm3, xmm2); // Expected result in xmm3 - __ subq(rsp, Immediate(kDoubleSize)); // For memory operand + __ AllocateStackSpace(kDoubleSize); // For memory operand // vfmadd132ss __ movl(rax, Immediate(1)); // Test number __ movaps(xmm8, xmm0); @@ -2484,7 +2484,7 @@ TEST(AssemblerX64vmovups) { __ shufps(xmm0, xmm0, 0x0); // brocast first argument __ shufps(xmm1, xmm1, 0x0); // brocast second argument // copy xmm1 to xmm0 through the stack to test the "vmovups reg, mem". - __ subq(rsp, Immediate(kSimd128Size)); + __ AllocateStackSpace(kSimd128Size); __ vmovups(Operand(rsp, 0), xmm1); __ vmovups(xmm0, Operand(rsp, 0)); __ addq(rsp, Immediate(kSimd128Size)); diff --git a/test/cctest/test-macro-assembler-x64.cc b/test/cctest/test-macro-assembler-x64.cc index f08e1fabb7..5b2b4b2203 100644 --- a/test/cctest/test-macro-assembler-x64.cc +++ b/test/cctest/test-macro-assembler-x64.cc @@ -865,7 +865,7 @@ TEST(OperandOffset) { void TestFloat32x4Abs(MacroAssembler* masm, Label* exit, float x, float y, float z, float w) { - __ subq(rsp, Immediate(kSimd128Size)); + __ AllocateStackSpace(kSimd128Size); __ Move(xmm1, x); __ Movss(Operand(rsp, 0 * kFloatSize), xmm1); @@ -902,7 +902,7 @@ void TestFloat32x4Abs(MacroAssembler* masm, Label* exit, float x, float y, void TestFloat32x4Neg(MacroAssembler* masm, Label* exit, float x, float y, float z, float w) { - __ subq(rsp, Immediate(kSimd128Size)); + __ AllocateStackSpace(kSimd128Size); __ Move(xmm1, x); __ Movss(Operand(rsp, 0 * kFloatSize), xmm1); @@ -938,7 +938,7 @@ void TestFloat32x4Neg(MacroAssembler* masm, Label* exit, float x, float y, } void TestFloat64x2Abs(MacroAssembler* masm, Label* exit, double x, double y) { - __ subq(rsp, Immediate(kSimd128Size)); + __ AllocateStackSpace(kSimd128Size); __ Move(xmm1, x); __ Movsd(Operand(rsp, 0 * kDoubleSize), xmm1); @@ -962,7 +962,7 @@ void TestFloat64x2Abs(MacroAssembler* masm, Label* exit, double x, double y) { } void TestFloat64x2Neg(MacroAssembler* masm, Label* exit, double x, double y) { - __ subq(rsp, Immediate(kSimd128Size)); + __ AllocateStackSpace(kSimd128Size); __ Move(xmm1, x); __ Movsd(Operand(rsp, 0 * kDoubleSize), xmm1); diff --git a/test/mjsunit/compiler/regress-9017.js b/test/mjsunit/compiler/regress-9017.js new file mode 100644 index 0000000000..7cbd4e0178 --- /dev/null +++ b/test/mjsunit/compiler/regress-9017.js @@ -0,0 +1,39 @@ +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --allow-natives-syntax --noturbo-inlining --noturbo-verify-allocation + +// Ensure that very large stack frames can be used successfully. +// The flag --noturbo-verify-allocation is to make this run a little faster; it +// shouldn't affect the behavior. + +const frame_size = 4096 * 4; // 4 pages +const num_locals = frame_size / 8; // Assume 8-byte floating point values + +function f() { return 0.1; } + +// Function g, on positive inputs, will call itself recursively. On negative +// inputs, it does a computation that requires a large number of locals. +// The flag --noturbo-inlining is important to keep the compiler from realizing +// that all of this work is for nothing. +let g_text = "if (input === 0) return; if (input > 0) return g(input - 1);"; +g_text += " var inc = f(); var a0 = 0;"; +for (let i = 1; i < num_locals; ++i) { + g_text += " var a" + i + " = a" + (i - 1) + " + inc;"; +} +g_text += " return f(a0"; +for (let i = 1; i < num_locals; ++i) { + g_text += ", a" + i; +} +g_text += ");"; +const g = new Function("input", g_text); + +%PrepareFunctionForOptimization(g); +g(1); +g(-1); +%OptimizeFunctionOnNextCall(g); + +// Use recursion to get past whatever stack space is already committed. +// 20 * 16kB = 320kB, comfortably below the default 1MB stack reservation limit. +g(20); diff --git a/test/mjsunit/mjsunit.status b/test/mjsunit/mjsunit.status index a771b1e867..2361128b0b 100644 --- a/test/mjsunit/mjsunit.status +++ b/test/mjsunit/mjsunit.status @@ -109,6 +109,7 @@ 'generated-transition-stub': [PASS, ['mode == debug', SKIP]], 'migrations': [SKIP], 'array-functions-prototype-misc': [PASS, SLOW, ['mode == debug', SKIP]], + 'compiler/regress-9017': [PASS, SLOW], 'compiler/regress-808472': [PASS, ['mode == debug', SKIP]], 'es6/promise-all-overflow-1': [SKIP], 'es6/promise-all-overflow-2': [PASS, SLOW, ['mode == debug or arch != x64', SKIP]], diff --git a/test/mjsunit/regress/regress-9017.js b/test/mjsunit/regress/regress-9017.js new file mode 100644 index 0000000000..1b9b9e7101 --- /dev/null +++ b/test/mjsunit/regress/regress-9017.js @@ -0,0 +1,15 @@ +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Call a recursive function that uses large numbers of bound arguments. If we +// are failing to touch consecutive guard pages on Windows when extending the +// stack for bound arguments, then this would crash. + +const frameSize = 4096 * 5; +const numValues = frameSize / 4; +const arr = new Array(numValues); +let counter = 10; +function f() { --counter; return 1 + (counter > 0 ? bound() : 0); } +const bound = f.bind.apply(f, arr); +bound(); diff --git a/test/mjsunit/regress/wasm/regress-9017.js b/test/mjsunit/regress/wasm/regress-9017.js new file mode 100644 index 0000000000..7a8930a146 --- /dev/null +++ b/test/mjsunit/regress/wasm/regress-9017.js @@ -0,0 +1,38 @@ +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Flags: --liftoff --nowasm-tier-up +// +// This test is intended to make Liftoff generate code that uses a very large +// stack frame, and then try to call another function (which would write to the +// stack pointer location). On Windows, large frames need extra code to touch +// every page in order, because the OS only leaves a small guard area for the +// stack, and trying to access past that area, even into memory that was +// intentionally reserved for this thread's stack, will crash the program. + +load('test/mjsunit/wasm/wasm-module-builder.js'); + +var builder = new WasmModuleBuilder(); + +var func_idx = builder.addFunction('helper', kSig_i_v) + .addLocals({i32_count: 1}) + .addBody([ + kExprI32Const, 0x01, + ]).index; + +var large_function_body = []; +const num_temporaries = 16 * 1024; +for (let i = 0; i < num_temporaries; ++i) { + large_function_body.push(kExprCallFunction, func_idx); +} +for (let i = 1; i < num_temporaries; ++i) { + large_function_body.push(kExprI32Add); +} + +builder.addFunction('test', kSig_i_v) + .addBody(large_function_body) + .exportFunc(); +var module = builder.instantiate(); + +assertEquals(num_temporaries, module.exports.test());