From f9e76d6dfff6cbf89a2bd896e0fdad8108b12d48 Mon Sep 17 00:00:00 2001 From: Bill Budge Date: Tue, 22 Dec 2020 12:58:22 -0800 Subject: [PATCH] [codegen] Handle alignment holes when pushing arguments - Modify InstructionSelectors to track both padding and multiple slot values to correctly adjust stack pointers when pushing arguments. Pass stack offset as an immediate operand. - Modify CodeGenerators to handle alignment padding. Bug: v8:9198 Change-Id: I1c132284e07b5f5e73ce570a641f17decdfba504 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2596027 Reviewed-by: Georg Neis Reviewed-by: Andreas Haas Commit-Queue: Bill Budge Cr-Commit-Position: refs/heads/master@{#72049} --- .../backend/arm/code-generator-arm.cc | 36 +++++-- .../backend/arm/instruction-selector-arm.cc | 6 +- .../arm64/instruction-selector-arm64.cc | 13 ++- .../backend/ia32/code-generator-ia32.cc | 97 +++++++++++-------- .../backend/ia32/instruction-selector-ia32.cc | 21 ++-- .../backend/x64/code-generator-x64.cc | 88 ++++++++--------- .../backend/x64/instruction-selector-x64.cc | 20 ++-- 7 files changed, 161 insertions(+), 120 deletions(-) diff --git a/src/compiler/backend/arm/code-generator-arm.cc b/src/compiler/backend/arm/code-generator-arm.cc index 74215cac30..1121eb8095 100644 --- a/src/compiler/backend/arm/code-generator-arm.cc +++ b/src/compiler/backend/arm/code-generator-arm.cc @@ -1795,22 +1795,38 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ VFPCanonicalizeNaN(result, value); break; } - case kArmPush: - if (instr->InputAt(0)->IsFPRegister()) { - LocationOperand* op = LocationOperand::cast(instr->InputAt(0)); + case kArmPush: { + int stack_decrement = i.InputInt32(0); + if (instr->InputAt(1)->IsFPRegister()) { + LocationOperand* op = LocationOperand::cast(instr->InputAt(1)); switch (op->representation()) { case MachineRepresentation::kFloat32: - __ vpush(i.InputFloatRegister(0)); + // 1 slot values are never padded. + DCHECK_EQ(stack_decrement, kSystemPointerSize); + __ vpush(i.InputFloatRegister(1)); frame_access_state()->IncreaseSPDelta(1); break; case MachineRepresentation::kFloat64: - __ vpush(i.InputDoubleRegister(0)); - frame_access_state()->IncreaseSPDelta(kDoubleSize / + // 2 slot values have up to 1 slot of padding. + DCHECK_GE(stack_decrement, kDoubleSize); + if (stack_decrement > kDoubleSize) { + DCHECK_EQ(stack_decrement, kDoubleSize + kSystemPointerSize); + __ AllocateStackSpace(kSystemPointerSize); + } + __ vpush(i.InputDoubleRegister(1)); + frame_access_state()->IncreaseSPDelta(stack_decrement / kSystemPointerSize); break; case MachineRepresentation::kSimd128: { - __ vpush(i.InputSimd128Register(0)); - frame_access_state()->IncreaseSPDelta(kSimd128Size / + // 4 slot values have up to 3 slots of padding. + DCHECK_GE(stack_decrement, kSimd128Size); + if (stack_decrement > kSimd128Size) { + int padding = stack_decrement - kSimd128Size; + DCHECK_LT(padding, kSimd128Size); + __ AllocateStackSpace(padding); + } + __ vpush(i.InputSimd128Register(1)); + frame_access_state()->IncreaseSPDelta(stack_decrement / kSystemPointerSize); break; } @@ -1819,11 +1835,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } } else { - __ push(i.InputRegister(0)); + DCHECK_EQ(stack_decrement, kSystemPointerSize); + __ push(i.InputRegister(1)); frame_access_state()->IncreaseSPDelta(1); } DCHECK_EQ(LeaveCC, i.OutputSBit()); break; + } case kArmPoke: { int const slot = MiscField::decode(instr->opcode()); __ str(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize)); diff --git a/src/compiler/backend/arm/instruction-selector-arm.cc b/src/compiler/backend/arm/instruction-selector-arm.cc index bd1e7c4b4f..6b287f7725 100644 --- a/src/compiler/backend/arm/instruction-selector-arm.cc +++ b/src/compiler/backend/arm/instruction-selector-arm.cc @@ -1741,10 +1741,14 @@ void InstructionSelector::EmitPrepareArguments( } } else { // Push any stack arguments. + int stack_decrement = 0; for (PushParameter input : base::Reversed(*arguments)) { + stack_decrement += kSystemPointerSize; // Skip any alignment holes in pushed nodes. if (input.node == nullptr) continue; - Emit(kArmPush, g.NoOutput(), g.UseRegister(input.node)); + InstructionOperand decrement = g.UseImmediate(stack_decrement); + stack_decrement = 0; + Emit(kArmPush, g.NoOutput(), decrement, g.UseRegister(input.node)); } } } diff --git a/src/compiler/backend/arm64/instruction-selector-arm64.cc b/src/compiler/backend/arm64/instruction-selector-arm64.cc index 0f432f3bc1..1aae505075 100644 --- a/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -2091,21 +2091,24 @@ void InstructionSelector::EmitPrepareArguments( // Poke the arguments into the stack. while (slot >= 0) { PushParameter input0 = (*arguments)[slot]; + // Skip holes in the param array. These represent both extra slots for + // multi-slot values and padding slots for alignment. + if (input0.node == nullptr) { + slot--; + continue; + } PushParameter input1 = slot > 0 ? (*arguments)[slot - 1] : PushParameter(); // Emit a poke-pair if consecutive parameters have the same type. // TODO(arm): Support consecutive Simd128 parameters. - if (input0.node != nullptr && input1.node != nullptr && + if (input1.node != nullptr && input0.location.GetType() == input1.location.GetType()) { Emit(kArm64PokePair, g.NoOutput(), g.UseRegister(input0.node), g.UseRegister(input1.node), g.TempImmediate(slot)); slot -= 2; - } else if (input0.node != nullptr) { + } else { Emit(kArm64Poke, g.NoOutput(), g.UseRegister(input0.node), g.TempImmediate(slot)); slot--; - } else { - // Skip any alignment holes in pushed nodes. - slot--; } } } diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc index 45a2c59597..1f0250a7da 100644 --- a/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/src/compiler/backend/ia32/code-generator-ia32.cc @@ -1824,69 +1824,80 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; } - case kIA32PushFloat32: - if (instr->InputAt(0)->IsFPRegister()) { + case kIA32PushFloat32: { + // 1 slot values are never padded. + DCHECK_EQ(i.InputInt32(0), kFloatSize); + if (instr->InputAt(1)->IsFPRegister()) { __ AllocateStackSpace(kFloatSize); - __ Movss(Operand(esp, 0), i.InputDoubleRegister(0)); - frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); - } else if (HasImmediateInput(instr, 0)) { - __ Move(kScratchDoubleReg, i.InputFloat32(0)); + __ Movss(Operand(esp, 0), i.InputDoubleRegister(1)); + } else if (HasImmediateInput(instr, 1)) { + __ AllocateStackSpace(kFloatSize); + __ Move(kScratchDoubleReg, i.InputFloat32(1)); + __ Movss(Operand(esp, 0), kScratchDoubleReg); + } else { + __ Movss(kScratchDoubleReg, i.InputOperand(1)); __ AllocateStackSpace(kFloatSize); __ Movss(Operand(esp, 0), kScratchDoubleReg); - frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); - } else { - __ Movss(kScratchDoubleReg, i.InputOperand(0)); - __ AllocateStackSpace(kFloatSize); - __ Movss(Operand(esp, 0), kScratchDoubleReg); - frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); } + int slots = kFloatSize / kSystemPointerSize; + frame_access_state()->IncreaseSPDelta(slots); break; - case kIA32PushFloat64: - if (instr->InputAt(0)->IsFPRegister()) { - __ AllocateStackSpace(kDoubleSize); - __ Movsd(Operand(esp, 0), i.InputDoubleRegister(0)); - frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); - } else if (HasImmediateInput(instr, 0)) { - __ Move(kScratchDoubleReg, i.InputDouble(0)); - __ AllocateStackSpace(kDoubleSize); + } + case kIA32PushFloat64: { + int stack_decrement = i.InputInt32(0); + // 2 slot values have up to 1 slot of padding. + DCHECK_GE(stack_decrement, kDoubleSize); + if (instr->InputAt(1)->IsFPRegister()) { + __ AllocateStackSpace(stack_decrement); + __ Movsd(Operand(esp, 0), i.InputDoubleRegister(1)); + } else if (HasImmediateInput(instr, 1)) { + __ Move(kScratchDoubleReg, i.InputDouble(1)); + __ AllocateStackSpace(stack_decrement); __ Movsd(Operand(esp, 0), kScratchDoubleReg); - frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); } else { - __ Movsd(kScratchDoubleReg, i.InputOperand(0)); - __ AllocateStackSpace(kDoubleSize); + __ Movsd(kScratchDoubleReg, i.InputOperand(1)); + __ AllocateStackSpace(stack_decrement); __ Movsd(Operand(esp, 0), kScratchDoubleReg); - frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); } + int slots = stack_decrement / kSystemPointerSize; + frame_access_state()->IncreaseSPDelta(slots); break; - case kIA32PushSimd128: - if (instr->InputAt(0)->IsFPRegister()) { - __ AllocateStackSpace(kSimd128Size); - __ Movups(Operand(esp, 0), i.InputSimd128Register(0)); + } + case kIA32PushSimd128: { + int stack_decrement = i.InputInt32(0); + // 4 slot values have up to 3 slots of padding. + DCHECK_GE(stack_decrement, kSimd128Size); + if (instr->InputAt(1)->IsFPRegister()) { + __ AllocateStackSpace(stack_decrement); + __ Movups(Operand(esp, 0), i.InputSimd128Register(1)); } else { - __ Movups(kScratchDoubleReg, i.InputOperand(0)); - __ AllocateStackSpace(kSimd128Size); + __ Movups(kScratchDoubleReg, i.InputOperand(1)); + __ AllocateStackSpace(stack_decrement); __ Movups(Operand(esp, 0), kScratchDoubleReg); } - frame_access_state()->IncreaseSPDelta(kSimd128Size / kSystemPointerSize); + int slots = stack_decrement / kSystemPointerSize; + frame_access_state()->IncreaseSPDelta(slots); break; - case kIA32Push: + } + case kIA32Push: { + // TODO(bbudge) Merge the push opcodes into a single one, as on x64. + // 1 slot values are never padded. + DCHECK_EQ(i.InputInt32(0), kSystemPointerSize); if (HasAddressingMode(instr)) { - size_t index = 0; + size_t index = 1; Operand operand = i.MemoryOperand(&index); __ push(operand); - frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); - } else if (instr->InputAt(0)->IsFPRegister()) { - __ AllocateStackSpace(kFloatSize); - __ Movsd(Operand(esp, 0), i.InputDoubleRegister(0)); - frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); - } else if (HasImmediateInput(instr, 0)) { - __ push(i.InputImmediate(0)); - frame_access_state()->IncreaseSPDelta(1); + } else if (instr->InputAt(1)->IsFPRegister()) { + __ AllocateStackSpace(kSystemPointerSize); + __ Movsd(Operand(esp, 0), i.InputDoubleRegister(1)); + } else if (HasImmediateInput(instr, 1)) { + __ push(i.InputImmediate(1)); } else { - __ push(i.InputOperand(0)); - frame_access_state()->IncreaseSPDelta(1); + __ push(i.InputOperand(1)); } + frame_access_state()->IncreaseSPDelta(1); break; + } case kIA32Poke: { int slot = MiscField::decode(instr->opcode()); if (HasImmediateInput(instr, 0)) { diff --git a/src/compiler/backend/ia32/instruction-selector-ia32.cc b/src/compiler/backend/ia32/instruction-selector-ia32.cc index 21389ba7a7..c99dcf88cc 100644 --- a/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -1353,17 +1353,22 @@ void InstructionSelector::EmitPrepareArguments( } else { // Push any stack arguments. int effect_level = GetEffectLevel(node); + int stack_decrement = 0; for (PushParameter input : base::Reversed(*arguments)) { - // Skip any alignment holes in pushed nodes. + stack_decrement += kSystemPointerSize; + // Skip holes in the param array. These represent both extra slots for + // multi-slot values and padding slots for alignment. if (input.node == nullptr) continue; + InstructionOperand decrement = g.UseImmediate(stack_decrement); + stack_decrement = 0; if (g.CanBeMemoryOperand(kIA32Push, node, input.node, effect_level)) { InstructionOperand outputs[1]; - InstructionOperand inputs[4]; + InstructionOperand inputs[5]; size_t input_count = 0; - InstructionCode opcode = kIA32Push; + inputs[input_count++] = decrement; AddressingMode mode = g.GetEffectiveAddressMemoryOperand( input.node, inputs, &input_count); - opcode |= AddressingModeField::encode(mode); + InstructionCode opcode = kIA32Push | AddressingModeField::encode(mode); Emit(opcode, 0, outputs, input_count, inputs); } else { InstructionOperand value = @@ -1374,13 +1379,13 @@ void InstructionSelector::EmitPrepareArguments( ? g.UseRegister(input.node) : g.Use(input.node); if (input.location.GetType() == MachineType::Float32()) { - Emit(kIA32PushFloat32, g.NoOutput(), value); + Emit(kIA32PushFloat32, g.NoOutput(), decrement, value); } else if (input.location.GetType() == MachineType::Float64()) { - Emit(kIA32PushFloat64, g.NoOutput(), value); + Emit(kIA32PushFloat64, g.NoOutput(), decrement, value); } else if (input.location.GetType() == MachineType::Simd128()) { - Emit(kIA32PushSimd128, g.NoOutput(), value); + Emit(kIA32PushSimd128, g.NoOutput(), decrement, value); } else { - Emit(kIA32Push, g.NoOutput(), value); + Emit(kIA32Push, g.NoOutput(), decrement, value); } } } diff --git a/src/compiler/backend/x64/code-generator-x64.cc b/src/compiler/backend/x64/code-generator-x64.cc index e905c7194f..eb1dadc9e1 100644 --- a/src/compiler/backend/x64/code-generator-x64.cc +++ b/src/compiler/backend/x64/code-generator-x64.cc @@ -2294,59 +2294,55 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kX64Inc32: __ incl(i.OutputRegister()); break; - case kX64Push: + case kX64Push: { + int stack_decrement = i.InputInt32(0); if (HasAddressingMode(instr)) { - size_t index = 0; + // 1 slot values are never padded. + DCHECK_EQ(stack_decrement, kSystemPointerSize); + size_t index = 1; Operand operand = i.MemoryOperand(&index); __ pushq(operand); - frame_access_state()->IncreaseSPDelta(1); - unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), - kSystemPointerSize); - } else if (HasImmediateInput(instr, 0)) { - __ pushq(i.InputImmediate(0)); - frame_access_state()->IncreaseSPDelta(1); - unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), - kSystemPointerSize); - } else if (HasRegisterInput(instr, 0)) { - __ pushq(i.InputRegister(0)); - frame_access_state()->IncreaseSPDelta(1); - unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), - kSystemPointerSize); - } else if (instr->InputAt(0)->IsFloatRegister() || - instr->InputAt(0)->IsDoubleRegister()) { - // TODO(titzer): use another machine instruction? - __ AllocateStackSpace(kDoubleSize); - frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); - unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), - kDoubleSize); - __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); - } else if (instr->InputAt(0)->IsSimd128Register()) { - // TODO(titzer): use another machine instruction? - __ AllocateStackSpace(kSimd128Size); - frame_access_state()->IncreaseSPDelta(kSimd128Size / - kSystemPointerSize); - unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), - kSimd128Size); - __ Movups(Operand(rsp, 0), i.InputSimd128Register(0)); - } else if (instr->InputAt(0)->IsStackSlot() || - instr->InputAt(0)->IsFloatStackSlot() || - instr->InputAt(0)->IsDoubleStackSlot()) { - __ pushq(i.InputOperand(0)); - frame_access_state()->IncreaseSPDelta(1); - unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), - kSystemPointerSize); + } else if (HasImmediateInput(instr, 1)) { + // 1 slot values are never padded. + DCHECK_EQ(stack_decrement, kSystemPointerSize); + __ pushq(i.InputImmediate(1)); + } else if (HasRegisterInput(instr, 1)) { + // 1 slot values are never padded. + DCHECK_EQ(stack_decrement, kSystemPointerSize); + __ pushq(i.InputRegister(1)); + } else if (instr->InputAt(1)->IsFloatRegister() || + instr->InputAt(1)->IsDoubleRegister()) { + // 1 slot values are never padded. + DCHECK_EQ(stack_decrement, kSystemPointerSize); + __ AllocateStackSpace(kSystemPointerSize); + __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1)); + } else if (instr->InputAt(1)->IsSimd128Register()) { + // 2 slot values have up to 1 slot of padding. + DCHECK_GE(stack_decrement, kSimd128Size); + __ AllocateStackSpace(stack_decrement); + // TODO(bbudge) Use Movaps when slots are aligned. + __ Movups(Operand(rsp, 0), i.InputSimd128Register(1)); + } else if (instr->InputAt(1)->IsStackSlot() || + instr->InputAt(1)->IsFloatStackSlot() || + instr->InputAt(1)->IsDoubleStackSlot()) { + // 1 slot values are never padded. + DCHECK_EQ(stack_decrement, kSystemPointerSize); + __ pushq(i.InputOperand(1)); } else { - DCHECK(instr->InputAt(0)->IsSimd128StackSlot()); - __ Movups(kScratchDoubleReg, i.InputOperand(0)); - // TODO(titzer): use another machine instruction? - __ AllocateStackSpace(kSimd128Size); - frame_access_state()->IncreaseSPDelta(kSimd128Size / - kSystemPointerSize); - unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), - kSimd128Size); + DCHECK(instr->InputAt(1)->IsSimd128StackSlot()); + // 2 slot values have up to 1 slot of padding. + DCHECK_GE(stack_decrement, kSimd128Size); + // TODO(bbudge) Use Movaps when slots are aligned. + __ Movups(kScratchDoubleReg, i.InputOperand(1)); + __ AllocateStackSpace(stack_decrement); __ Movups(Operand(rsp, 0), kScratchDoubleReg); } + int slots = stack_decrement / kSystemPointerSize; + frame_access_state()->IncreaseSPDelta(slots); + unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), + stack_decrement); break; + } case kX64Poke: { int slot = MiscField::decode(instr->opcode()); if (HasImmediateInput(instr, 0)) { diff --git a/src/compiler/backend/x64/instruction-selector-x64.cc b/src/compiler/backend/x64/instruction-selector-x64.cc index e2d8cf27bf..bcb4d10908 100644 --- a/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/src/compiler/backend/x64/instruction-selector-x64.cc @@ -1795,29 +1795,33 @@ void InstructionSelector::EmitPrepareArguments( } else { // Push any stack arguments. int effect_level = GetEffectLevel(node); + int stack_decrement = 0; for (PushParameter input : base::Reversed(*arguments)) { - // Skip any alignment holes in pushed nodes. We may have one in case of a - // Simd128 stack argument. + stack_decrement += kSystemPointerSize; + // Skip holes in the param array. These represent both extra slots for + // multi-slot values and padding slots for alignment. if (input.node == nullptr) continue; + InstructionOperand decrement = g.UseImmediate(stack_decrement); + stack_decrement = 0; if (g.CanBeImmediate(input.node)) { - Emit(kX64Push, g.NoOutput(), g.UseImmediate(input.node)); + Emit(kX64Push, g.NoOutput(), decrement, g.UseImmediate(input.node)); } else if (IsSupported(ATOM) || sequence()->IsFP(GetVirtualRegister(input.node))) { // TODO(titzer): X64Push cannot handle stack->stack double moves // because there is no way to encode fixed double slots. - Emit(kX64Push, g.NoOutput(), g.UseRegister(input.node)); + Emit(kX64Push, g.NoOutput(), decrement, g.UseRegister(input.node)); } else if (g.CanBeMemoryOperand(kX64Push, node, input.node, effect_level)) { InstructionOperand outputs[1]; - InstructionOperand inputs[4]; + InstructionOperand inputs[5]; size_t input_count = 0; - InstructionCode opcode = kX64Push; + inputs[input_count++] = decrement; AddressingMode mode = g.GetEffectiveAddressMemoryOperand( input.node, inputs, &input_count); - opcode |= AddressingModeField::encode(mode); + InstructionCode opcode = kX64Push | AddressingModeField::encode(mode); Emit(opcode, 0, outputs, input_count, inputs); } else { - Emit(kX64Push, g.NoOutput(), g.UseAny(input.node)); + Emit(kX64Push, g.NoOutput(), decrement, g.UseAny(input.node)); } } }